2 Wdiff usr/src/uts/intel/ia32/ml/copy.s

Print this page

11787 Kernel needs to be built with retpolines
11788 Kernel needs to generally use RSB stuffing
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: John Levon <john.levon@joyent.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/intel/ia32/ml/copy.s
          +++ new/usr/src/uts/intel/ia32/ml/copy.s

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Copyright (c) 2009, Intel Corporation
  28   28   * All rights reserved.

↓ open down ↓

28 lines elided

↑ open up ↑

  29   29   */
  30   30  
  31   31  /*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.        */
  32   32  /*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T          */
  33   33  /*         All Rights Reserved                                          */
  34   34  
  35   35  /*       Copyright (c) 1987, 1988 Microsoft Corporation                 */
  36   36  /*         All Rights Reserved                                          */
  37   37  
  38   38  /*
  39      - * Copyright (c) 2018 Joyent, Inc.
       39 + * Copyright 2019 Joyent, Inc.
  40   40   */
  41   41  
  42   42  #include <sys/errno.h>
  43   43  #include <sys/asm_linkage.h>
  44   44  
  45   45  #if defined(__lint)
  46   46  #include <sys/types.h>
  47   47  #include <sys/systm.h>
  48   48  #else   /* __lint */
  49   49  #include "assym.h"

  50   50  #endif  /* __lint */
  51   51  
  52   52  #define KCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  53   53  #define XCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  54   54  /*
  55   55   * Non-temopral access (NTA) alignment requirement
  56   56   */
  57   57  #define NTA_ALIGN_SIZE  4       /* Must be at least 4-byte aligned */
  58   58  #define NTA_ALIGN_MASK  _CONST(NTA_ALIGN_SIZE-1)
  59   59  #define COUNT_ALIGN_SIZE        16      /* Must be at least 16-byte aligned */
  60   60  #define COUNT_ALIGN_MASK        _CONST(COUNT_ALIGN_SIZE-1)
  61   61  
  62   62  /*
  63   63   * With the introduction of Broadwell, Intel has introduced supervisor mode
  64   64   * access protection -- SMAP. SMAP forces the kernel to set certain bits to
  65   65   * enable access of user pages (AC in rflags, defines as PS_ACHK in
  66   66   * <sys/psw.h>). One of the challenges is that the implementation of many of the
  67   67   * userland copy routines directly use the kernel ones. For example, copyin and
  68   68   * copyout simply go and jump to the do_copy_fault label and traditionally let
  69   69   * those deal with the return for them. In fact, changing that is a can of frame
  70   70   * pointers.
  71   71   *
  72   72   * Rules and Constraints:
  73   73   *
  74   74   * 1. For anything that's not in copy.s, we have it do explicit calls to the
  75   75   * smap related code. It usually is in a position where it is able to. This is
  76   76   * restricted to the following three places: DTrace, resume() in swtch.s and
  77   77   * on_fault/no_fault. If you want to add it somewhere else, we should be
  78   78   * thinking twice.
  79   79   *
  80   80   * 2. We try to toggle this at the smallest window possible. This means that if
  81   81   * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
  82   82   * other function, we will always leave with SMAP enabled (the kernel cannot
  83   83   * access user pages).
  84   84   *
  85   85   * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
  86   86   * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
  87   87   * which already takes care of ensuring that SMAP is enabled and disabled. Note
  88   88   * this means that when under an on_fault()/no_fault() handler, one must not
  89   89   * call the non-*_noeer() routines.
  90   90   *
  91   91   * 4. The first thing we should do after coming out of an lofault handler is to
  92   92   * make sure that we call smap_enable again to ensure that we are safely
  93   93   * protected, as more often than not, we will have disabled smap to get there.
  94   94   *
  95   95   * 5. The SMAP functions, smap_enable and smap_disable may not touch any
  96   96   * registers beyond those done by the call and ret. These routines may be called
  97   97   * from arbitrary contexts in copy.s where we have slightly more special ABIs in
  98   98   * place.
  99   99   *
 100  100   * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
 101  101   * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and
 102  102   * smap_disable()). If the number of these is changed, you must update the
 103  103   * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
 104  104   *
 105  105   * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is
 106  106   * no known technical reason preventing it from being enabled.
 107  107   *
 108  108   * 8. Generally this .s file is processed by a K&R style cpp. This means that it
 109  109   * really has a lot of feelings about whitespace. In particular, if you have a
 110  110   * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
 111  111   *
 112  112   * 9. The smap_enable and smap_disable functions should not generally be called.
 113  113   * They exist such that DTrace and on_trap() may use them, that's it.
 114  114   *
 115  115   * 10. In general, the kernel has its own value for rflags that gets used. This
 116  116   * is maintained in a few different places which vary based on how the thread
 117  117   * comes into existence and whether it's a user thread. In general, when the
 118  118   * kernel takes a trap, it always will set ourselves to a known set of flags,
 119  119   * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
 120  120   * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,

↓ open down ↓

71 lines elided

↑ open up ↑

 121  121   * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
 122  122   * where that gets masked off.
 123  123   */
 124  124  
 125  125  /*
 126  126   * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
 127  127   * "rep smovq" for large sizes. Performance data shows that many calls to
 128  128   * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
 129  129   * these small sizes unrolled code is used. For medium sizes loops writing
 130  130   * 64-bytes per loop are used. Transition points were determined experimentally.
 131      - */ 
      131 + */
 132  132  #define BZERO_USE_REP   (1024)
 133  133  #define BCOPY_DFLT_REP  (128)
 134  134  #define BCOPY_NHM_REP   (768)
 135  135  
 136  136  /*
 137  137   * Copy a block of storage, returning an error code if `from' or
 138  138   * `to' takes a kernel pagefault which cannot be resolved.
 139  139   * Returns errno value on pagefault error, 0 if all ok
 140  140   */
 141  141

 142  142  /*
 143  143   * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
 144  144   * additional call instructions.
 145  145   */
 146  146  #if defined(__amd64)
 147  147  #define SMAP_DISABLE_COUNT      16
 148  148  #define SMAP_ENABLE_COUNT       26
 149  149  #elif defined(__i386)
 150  150  #define SMAP_DISABLE_COUNT      0
 151  151  #define SMAP_ENABLE_COUNT       0
 152  152  #endif
 153  153  
 154  154  #define SMAP_DISABLE_INSTR(ITER)                \
 155  155          .globl  _smap_disable_patch_/**/ITER;   \
 156  156          _smap_disable_patch_/**/ITER/**/:;      \
 157  157          nop; nop; nop;
 158  158  
 159  159  #define SMAP_ENABLE_INSTR(ITER)                 \
 160  160          .globl  _smap_enable_patch_/**/ITER;    \
 161  161          _smap_enable_patch_/**/ITER/**/:;       \
 162  162          nop; nop; nop;
 163  163  
 164  164  #if defined(__lint)
 165  165  
 166  166  /* ARGSUSED */
 167  167  int
 168  168  kcopy(const void *from, void *to, size_t count)
 169  169  { return (0); }
 170  170  
 171  171  #else   /* __lint */

↓ open down ↓

30 lines elided

↑ open up ↑

 172  172  
 173  173          .globl  kernelbase
 174  174          .globl  postbootkernelbase
 175  175  
 176  176  #if defined(__amd64)
 177  177  
 178  178          ENTRY(kcopy)
 179  179          pushq   %rbp
 180  180          movq    %rsp, %rbp
 181  181  #ifdef DEBUG
 182      -        cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
      182 +        cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 183  183          jb      0f
 184  184          cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 185  185          jnb     1f
 186  186  0:      leaq    .kcopy_panic_msg(%rip), %rdi
 187  187          xorl    %eax, %eax
 188  188          call    panic
 189  189  1:
 190  190  #endif
 191  191          /*
 192  192           * pass lofault value as 4th argument to do_copy_fault

 193  193           */
 194  194          leaq    _kcopy_copyerr(%rip), %rcx
 195  195          movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
 196  196  
 197  197  do_copy_fault:
 198  198          movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
 199  199          movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
 200  200          call    bcopy_altentry
 201  201          xorl    %eax, %eax              /* return 0 (success) */
 202  202          SMAP_ENABLE_INSTR(0)
 203  203  
 204  204          /*
 205  205           * A fault during do_copy_fault is indicated through an errno value
 206  206           * in %rax and we iretq from the trap handler to here.
 207  207           */
 208  208  _kcopy_copyerr:
 209  209          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
 210  210          leave
 211  211          ret
 212  212          SET_SIZE(kcopy)
 213  213  
 214  214  #elif defined(__i386)
 215  215  
 216  216  #define ARG_FROM        8
 217  217  #define ARG_TO          12
 218  218  #define ARG_COUNT       16
 219  219  
 220  220          ENTRY(kcopy)
 221  221  #ifdef DEBUG
 222  222          pushl   %ebp
 223  223          movl    %esp, %ebp

↓ open down ↓

31 lines elided

↑ open up ↑

 224  224          movl    postbootkernelbase, %eax
 225  225          cmpl    %eax, ARG_FROM(%ebp)
 226  226          jb      0f
 227  227          cmpl    %eax, ARG_TO(%ebp)
 228  228          jnb     1f
 229  229  0:      pushl   $.kcopy_panic_msg
 230  230          call    panic
 231  231  1:      popl    %ebp
 232  232  #endif
 233  233          lea     _kcopy_copyerr, %eax    /* lofault value */
 234      -        movl    %gs:CPU_THREAD, %edx    
      234 +        movl    %gs:CPU_THREAD, %edx
 235  235  
 236  236  do_copy_fault:
 237  237          pushl   %ebp
 238  238          movl    %esp, %ebp              /* setup stack frame */
 239  239          pushl   %esi
 240  240          pushl   %edi                    /* save registers */
 241  241  
 242  242          movl    T_LOFAULT(%edx), %edi
 243  243          pushl   %edi                    /* save the current lofault */
 244  244          movl    %eax, T_LOFAULT(%edx)   /* new lofault */

 245  245  
 246  246          movl    ARG_COUNT(%ebp), %ecx
 247  247          movl    ARG_FROM(%ebp), %esi
 248  248          movl    ARG_TO(%ebp), %edi
 249  249          shrl    $2, %ecx                /* word count */
 250  250          rep
 251  251            smovl
 252  252          movl    ARG_COUNT(%ebp), %ecx
 253  253          andl    $3, %ecx                /* bytes left over */
 254  254          rep
 255  255            smovb
 256  256          xorl    %eax, %eax
 257  257  
 258  258          /*
 259  259           * A fault during do_copy_fault is indicated through an errno value
 260  260           * in %eax and we iret from the trap handler to here.
 261  261           */
 262  262  _kcopy_copyerr:
 263  263          popl    %ecx
 264  264          popl    %edi
 265  265          movl    %ecx, T_LOFAULT(%edx)   /* restore the original lofault */
 266  266          popl    %esi
 267  267          popl    %ebp
 268  268          ret
 269  269          SET_SIZE(kcopy)
 270  270  
 271  271  #undef  ARG_FROM
 272  272  #undef  ARG_TO
 273  273  #undef  ARG_COUNT
 274  274  
 275  275  #endif  /* __i386 */
 276  276  #endif  /* __lint */
 277  277  
 278  278  #if defined(__lint)
 279  279  
 280  280  /*
 281  281   * Copy a block of storage.  Similar to kcopy but uses non-temporal
 282  282   * instructions.
 283  283   */
 284  284  
 285  285  /* ARGSUSED */
 286  286  int
 287  287  kcopy_nta(const void *from, void *to, size_t count, int copy_cached)
 288  288  { return (0); }
 289  289  
 290  290  #else   /* __lint */
 291  291  
 292  292  #if defined(__amd64)
 293  293  
 294  294  #define COPY_LOOP_INIT(src, dst, cnt)   \
 295  295          addq    cnt, src;                       \
 296  296          addq    cnt, dst;                       \
 297  297          shrq    $3, cnt;                        \
 298  298          neg     cnt
 299  299  
 300  300          /* Copy 16 bytes per loop.  Uses %rax and %r8 */
 301  301  #define COPY_LOOP_BODY(src, dst, cnt)   \
 302  302          prefetchnta     0x100(src, cnt, 8);     \

↓ open down ↓

58 lines elided

↑ open up ↑

 303  303          movq    (src, cnt, 8), %rax;            \
 304  304          movq    0x8(src, cnt, 8), %r8;          \
 305  305          movnti  %rax, (dst, cnt, 8);            \
 306  306          movnti  %r8, 0x8(dst, cnt, 8);          \
 307  307          addq    $2, cnt
 308  308  
 309  309          ENTRY(kcopy_nta)
 310  310          pushq   %rbp
 311  311          movq    %rsp, %rbp
 312  312  #ifdef DEBUG
 313      -        cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
      313 +        cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 314  314          jb      0f
 315  315          cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 316  316          jnb     1f
 317  317  0:      leaq    .kcopy_panic_msg(%rip), %rdi
 318  318          xorl    %eax, %eax
 319  319          call    panic
 320  320  1:
 321  321  #endif
 322  322  
 323  323          movq    %gs:CPU_THREAD, %r9

 324  324          cmpq    $0, %rcx                /* No non-temporal access? */
 325  325          /*
 326  326           * pass lofault value as 4th argument to do_copy_fault
 327  327           */
 328  328          leaq    _kcopy_nta_copyerr(%rip), %rcx  /* doesn't set rflags */
 329  329          jnz     do_copy_fault           /* use regular access */
 330  330          /*
 331  331           * Make sure cnt is >= KCOPY_MIN_SIZE
 332  332           */
 333  333          cmpq    $KCOPY_MIN_SIZE, %rdx
 334  334          jb      do_copy_fault
 335  335  
 336  336          /*
 337  337           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
 338  338           * count is COUNT_ALIGN_SIZE aligned.
 339  339           */
 340  340          movq    %rdi, %r10
 341  341          orq     %rsi, %r10
 342  342          andq    $NTA_ALIGN_MASK, %r10
 343  343          orq     %rdx, %r10
 344  344          andq    $COUNT_ALIGN_MASK, %r10
 345  345          jnz     do_copy_fault
 346  346  
 347  347          ALTENTRY(do_copy_fault_nta)
 348  348          movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
 349  349          movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
 350  350          movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
 351  351  
 352  352          /*
 353  353           * COPY_LOOP_BODY uses %rax and %r8
 354  354           */
 355  355          COPY_LOOP_INIT(%rdi, %rsi, %rdx)
 356  356  2:      COPY_LOOP_BODY(%rdi, %rsi, %rdx)
 357  357          jnz     2b
 358  358  
 359  359          mfence
 360  360          xorl    %eax, %eax              /* return 0 (success) */
 361  361          SMAP_ENABLE_INSTR(1)
 362  362  
 363  363  _kcopy_nta_copyerr:
 364  364          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
 365  365          leave
 366  366          ret
 367  367          SET_SIZE(do_copy_fault_nta)
 368  368          SET_SIZE(kcopy_nta)
 369  369  
 370  370  #elif defined(__i386)
 371  371  
 372  372  #define ARG_FROM        8
 373  373  #define ARG_TO          12
 374  374  #define ARG_COUNT       16
 375  375  
 376  376  #define COPY_LOOP_INIT(src, dst, cnt)   \
 377  377          addl    cnt, src;                       \
 378  378          addl    cnt, dst;                       \
 379  379          shrl    $3, cnt;                        \
 380  380          neg     cnt
 381  381  
 382  382  #define COPY_LOOP_BODY(src, dst, cnt)   \
 383  383          prefetchnta     0x100(src, cnt, 8);     \
 384  384          movl    (src, cnt, 8), %esi;            \
 385  385          movnti  %esi, (dst, cnt, 8);            \
 386  386          movl    0x4(src, cnt, 8), %esi;         \
 387  387          movnti  %esi, 0x4(dst, cnt, 8);         \
 388  388          movl    0x8(src, cnt, 8), %esi;         \
 389  389          movnti  %esi, 0x8(dst, cnt, 8);         \
 390  390          movl    0xc(src, cnt, 8), %esi;         \
 391  391          movnti  %esi, 0xc(dst, cnt, 8);         \
 392  392          addl    $2, cnt
 393  393  
 394  394          /*
 395  395           * kcopy_nta is not implemented for 32-bit as no performance
 396  396           * improvement was shown.  We simply jump directly to kcopy
 397  397           * and discard the 4 arguments.
 398  398           */

↓ open down ↓

75 lines elided

↑ open up ↑

 399  399          ENTRY(kcopy_nta)
 400  400          jmp     kcopy
 401  401  
 402  402          lea     _kcopy_nta_copyerr, %eax        /* lofault value */
 403  403          ALTENTRY(do_copy_fault_nta)
 404  404          pushl   %ebp
 405  405          movl    %esp, %ebp              /* setup stack frame */
 406  406          pushl   %esi
 407  407          pushl   %edi
 408  408  
 409      -        movl    %gs:CPU_THREAD, %edx    
      409 +        movl    %gs:CPU_THREAD, %edx
 410  410          movl    T_LOFAULT(%edx), %edi
 411  411          pushl   %edi                    /* save the current lofault */
 412  412          movl    %eax, T_LOFAULT(%edx)   /* new lofault */
 413  413  
 414  414          /* COPY_LOOP_BODY needs to use %esi */
 415  415          movl    ARG_COUNT(%ebp), %ecx
 416  416          movl    ARG_FROM(%ebp), %edi
 417  417          movl    ARG_TO(%ebp), %eax
 418  418          COPY_LOOP_INIT(%edi, %eax, %ecx)
 419  419  1:      COPY_LOOP_BODY(%edi, %eax, %ecx)

 420  420          jnz     1b
 421  421          mfence
 422  422  
 423  423          xorl    %eax, %eax
 424  424  _kcopy_nta_copyerr:
 425  425          popl    %ecx
 426  426          popl    %edi
 427  427          movl    %ecx, T_LOFAULT(%edx)   /* restore the original lofault */
 428  428          popl    %esi
 429  429          leave
 430  430          ret
 431  431          SET_SIZE(do_copy_fault_nta)
 432  432          SET_SIZE(kcopy_nta)
 433  433  
 434  434  #undef  ARG_FROM
 435  435  #undef  ARG_TO
 436  436  #undef  ARG_COUNT
 437  437  
 438  438  #endif  /* __i386 */
 439  439  #endif  /* __lint */
 440  440  
 441  441  #if defined(__lint)
 442  442  
 443  443  /* ARGSUSED */
 444  444  void
 445  445  bcopy(const void *from, void *to, size_t count)
 446  446  {}
 447  447

↓ open down ↓

28 lines elided

↑ open up ↑

 448  448  #else   /* __lint */
 449  449  
 450  450  #if defined(__amd64)
 451  451  
 452  452          ENTRY(bcopy)
 453  453  #ifdef DEBUG
 454  454          orq     %rdx, %rdx              /* %rdx = count */
 455  455          jz      1f
 456  456          cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 457  457          jb      0f
 458      -        cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */         
      458 +        cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 459  459          jnb     1f
 460  460  0:      leaq    .bcopy_panic_msg(%rip), %rdi
 461  461          jmp     call_panic              /* setup stack and call panic */
 462  462  1:
 463  463  #endif
 464  464          /*
 465  465           * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
 466  466           * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
 467  467           * uses these registers in future they must be saved and restored.
 468  468           */

 469  469          ALTENTRY(bcopy_altentry)
 470  470  do_copy:
 471  471  #define L(s) .bcopy/**/s
 472  472          cmpq    $0x50, %rdx             /* 80 */
 473  473          jae     bcopy_ck_size
 474  474

↓ open down ↓

6 lines elided

↑ open up ↑

 475  475          /*
 476  476           * Performance data shows many caller's copy small buffers. So for
 477  477           * best perf for these sizes unrolled code is used. Store data without
 478  478           * worrying about alignment.
 479  479           */
 480  480          leaq    L(fwdPxQx)(%rip), %r10
 481  481          addq    %rdx, %rdi
 482  482          addq    %rdx, %rsi
 483  483          movslq  (%r10,%rdx,4), %rcx
 484  484          leaq    (%rcx,%r10,1), %r10
 485      -        jmpq    *%r10
      485 +        INDIRECT_JMP_REG(r10)
 486  486  
 487  487          .p2align 4
 488  488  L(fwdPxQx):
 489  489          .int       L(P0Q0)-L(fwdPxQx)   /* 0 */
 490  490          .int       L(P1Q0)-L(fwdPxQx)
 491  491          .int       L(P2Q0)-L(fwdPxQx)
 492  492          .int       L(P3Q0)-L(fwdPxQx)
 493  493          .int       L(P4Q0)-L(fwdPxQx)
 494  494          .int       L(P5Q0)-L(fwdPxQx)
 495  495          .int       L(P6Q0)-L(fwdPxQx)
 496      -        .int       L(P7Q0)-L(fwdPxQx) 
      496 +        .int       L(P7Q0)-L(fwdPxQx)
 497  497  
 498  498          .int       L(P0Q1)-L(fwdPxQx)   /* 8 */
 499  499          .int       L(P1Q1)-L(fwdPxQx)
 500  500          .int       L(P2Q1)-L(fwdPxQx)
 501  501          .int       L(P3Q1)-L(fwdPxQx)
 502  502          .int       L(P4Q1)-L(fwdPxQx)
 503  503          .int       L(P5Q1)-L(fwdPxQx)
 504  504          .int       L(P6Q1)-L(fwdPxQx)
 505      -        .int       L(P7Q1)-L(fwdPxQx) 
      505 +        .int       L(P7Q1)-L(fwdPxQx)
 506  506  
 507  507          .int       L(P0Q2)-L(fwdPxQx)   /* 16 */
 508  508          .int       L(P1Q2)-L(fwdPxQx)
 509  509          .int       L(P2Q2)-L(fwdPxQx)
 510  510          .int       L(P3Q2)-L(fwdPxQx)
 511  511          .int       L(P4Q2)-L(fwdPxQx)
 512  512          .int       L(P5Q2)-L(fwdPxQx)
 513  513          .int       L(P6Q2)-L(fwdPxQx)
 514      -        .int       L(P7Q2)-L(fwdPxQx) 
      514 +        .int       L(P7Q2)-L(fwdPxQx)
 515  515  
 516  516          .int       L(P0Q3)-L(fwdPxQx)   /* 24 */
 517  517          .int       L(P1Q3)-L(fwdPxQx)
 518  518          .int       L(P2Q3)-L(fwdPxQx)
 519  519          .int       L(P3Q3)-L(fwdPxQx)
 520  520          .int       L(P4Q3)-L(fwdPxQx)
 521  521          .int       L(P5Q3)-L(fwdPxQx)
 522  522          .int       L(P6Q3)-L(fwdPxQx)
 523      -        .int       L(P7Q3)-L(fwdPxQx) 
      523 +        .int       L(P7Q3)-L(fwdPxQx)
 524  524  
 525  525          .int       L(P0Q4)-L(fwdPxQx)   /* 32 */
 526  526          .int       L(P1Q4)-L(fwdPxQx)
 527  527          .int       L(P2Q4)-L(fwdPxQx)
 528  528          .int       L(P3Q4)-L(fwdPxQx)
 529  529          .int       L(P4Q4)-L(fwdPxQx)
 530  530          .int       L(P5Q4)-L(fwdPxQx)
 531  531          .int       L(P6Q4)-L(fwdPxQx)
 532      -        .int       L(P7Q4)-L(fwdPxQx) 
      532 +        .int       L(P7Q4)-L(fwdPxQx)
 533  533  
 534  534          .int       L(P0Q5)-L(fwdPxQx)   /* 40 */
 535  535          .int       L(P1Q5)-L(fwdPxQx)
 536  536          .int       L(P2Q5)-L(fwdPxQx)
 537  537          .int       L(P3Q5)-L(fwdPxQx)
 538  538          .int       L(P4Q5)-L(fwdPxQx)
 539  539          .int       L(P5Q5)-L(fwdPxQx)
 540  540          .int       L(P6Q5)-L(fwdPxQx)
 541      -        .int       L(P7Q5)-L(fwdPxQx) 
      541 +        .int       L(P7Q5)-L(fwdPxQx)
 542  542  
 543  543          .int       L(P0Q6)-L(fwdPxQx)   /* 48 */
 544  544          .int       L(P1Q6)-L(fwdPxQx)
 545  545          .int       L(P2Q6)-L(fwdPxQx)
 546  546          .int       L(P3Q6)-L(fwdPxQx)
 547  547          .int       L(P4Q6)-L(fwdPxQx)
 548  548          .int       L(P5Q6)-L(fwdPxQx)
 549  549          .int       L(P6Q6)-L(fwdPxQx)
 550      -        .int       L(P7Q6)-L(fwdPxQx) 
      550 +        .int       L(P7Q6)-L(fwdPxQx)
 551  551  
 552  552          .int       L(P0Q7)-L(fwdPxQx)   /* 56 */
 553  553          .int       L(P1Q7)-L(fwdPxQx)
 554  554          .int       L(P2Q7)-L(fwdPxQx)
 555  555          .int       L(P3Q7)-L(fwdPxQx)
 556  556          .int       L(P4Q7)-L(fwdPxQx)
 557  557          .int       L(P5Q7)-L(fwdPxQx)
 558  558          .int       L(P6Q7)-L(fwdPxQx)
 559      -        .int       L(P7Q7)-L(fwdPxQx) 
      559 +        .int       L(P7Q7)-L(fwdPxQx)
 560  560  
 561  561          .int       L(P0Q8)-L(fwdPxQx)   /* 64 */
 562  562          .int       L(P1Q8)-L(fwdPxQx)
 563  563          .int       L(P2Q8)-L(fwdPxQx)
 564  564          .int       L(P3Q8)-L(fwdPxQx)
 565  565          .int       L(P4Q8)-L(fwdPxQx)
 566  566          .int       L(P5Q8)-L(fwdPxQx)
 567  567          .int       L(P6Q8)-L(fwdPxQx)
 568  568          .int       L(P7Q8)-L(fwdPxQx)
 569  569

 570  570          .int       L(P0Q9)-L(fwdPxQx)   /* 72 */
 571  571          .int       L(P1Q9)-L(fwdPxQx)
 572  572          .int       L(P2Q9)-L(fwdPxQx)
 573  573          .int       L(P3Q9)-L(fwdPxQx)
 574  574          .int       L(P4Q9)-L(fwdPxQx)
 575  575          .int       L(P5Q9)-L(fwdPxQx)
 576  576          .int       L(P6Q9)-L(fwdPxQx)
 577  577          .int       L(P7Q9)-L(fwdPxQx)   /* 79 */
 578  578  
 579  579          .p2align 4
 580  580  L(P0Q9):
 581  581          mov    -0x48(%rdi), %rcx
 582  582          mov    %rcx, -0x48(%rsi)
 583  583  L(P0Q8):
 584  584          mov    -0x40(%rdi), %r10
 585  585          mov    %r10, -0x40(%rsi)
 586  586  L(P0Q7):
 587  587          mov    -0x38(%rdi), %r8
 588  588          mov    %r8, -0x38(%rsi)
 589  589  L(P0Q6):
 590  590          mov    -0x30(%rdi), %rcx
 591  591          mov    %rcx, -0x30(%rsi)
 592  592  L(P0Q5):
 593  593          mov    -0x28(%rdi), %r10
 594  594          mov    %r10, -0x28(%rsi)
 595  595  L(P0Q4):
 596  596          mov    -0x20(%rdi), %r8

↓ open down ↓

27 lines elided

↑ open up ↑

 597  597          mov    %r8, -0x20(%rsi)
 598  598  L(P0Q3):
 599  599          mov    -0x18(%rdi), %rcx
 600  600          mov    %rcx, -0x18(%rsi)
 601  601  L(P0Q2):
 602  602          mov    -0x10(%rdi), %r10
 603  603          mov    %r10, -0x10(%rsi)
 604  604  L(P0Q1):
 605  605          mov    -0x8(%rdi), %r8
 606  606          mov    %r8, -0x8(%rsi)
 607      -L(P0Q0):                                   
 608      -        ret   
      607 +L(P0Q0):
      608 +        ret
 609  609  
 610  610          .p2align 4
 611  611  L(P1Q9):
 612  612          mov    -0x49(%rdi), %r8
 613  613          mov    %r8, -0x49(%rsi)
 614  614  L(P1Q8):
 615  615          mov    -0x41(%rdi), %rcx
 616  616          mov    %rcx, -0x41(%rsi)
 617  617  L(P1Q7):
 618  618          mov    -0x39(%rdi), %r10

 619  619          mov    %r10, -0x39(%rsi)
 620  620  L(P1Q6):
 621  621          mov    -0x31(%rdi), %r8
 622  622          mov    %r8, -0x31(%rsi)
 623  623  L(P1Q5):
 624  624          mov    -0x29(%rdi), %rcx
 625  625          mov    %rcx, -0x29(%rsi)
 626  626  L(P1Q4):
 627  627          mov    -0x21(%rdi), %r10
 628  628          mov    %r10, -0x21(%rsi)
 629  629  L(P1Q3):
 630  630          mov    -0x19(%rdi), %r8

↓ open down ↓

12 lines elided

↑ open up ↑

 631  631          mov    %r8, -0x19(%rsi)
 632  632  L(P1Q2):
 633  633          mov    -0x11(%rdi), %rcx
 634  634          mov    %rcx, -0x11(%rsi)
 635  635  L(P1Q1):
 636  636          mov    -0x9(%rdi), %r10
 637  637          mov    %r10, -0x9(%rsi)
 638  638  L(P1Q0):
 639  639          movzbq -0x1(%rdi), %r8
 640  640          mov    %r8b, -0x1(%rsi)
 641      -        ret   
      641 +        ret
 642  642  
 643  643          .p2align 4
 644  644  L(P2Q9):
 645  645          mov    -0x4a(%rdi), %r8
 646  646          mov    %r8, -0x4a(%rsi)
 647  647  L(P2Q8):
 648  648          mov    -0x42(%rdi), %rcx
 649  649          mov    %rcx, -0x42(%rsi)
 650  650  L(P2Q7):
 651  651          mov    -0x3a(%rdi), %r10

 652  652          mov    %r10, -0x3a(%rsi)
 653  653  L(P2Q6):
 654  654          mov    -0x32(%rdi), %r8
 655  655          mov    %r8, -0x32(%rsi)
 656  656  L(P2Q5):
 657  657          mov    -0x2a(%rdi), %rcx
 658  658          mov    %rcx, -0x2a(%rsi)
 659  659  L(P2Q4):
 660  660          mov    -0x22(%rdi), %r10
 661  661          mov    %r10, -0x22(%rsi)
 662  662  L(P2Q3):
 663  663          mov    -0x1a(%rdi), %r8

↓ open down ↓

12 lines elided

↑ open up ↑

 664  664          mov    %r8, -0x1a(%rsi)
 665  665  L(P2Q2):
 666  666          mov    -0x12(%rdi), %rcx
 667  667          mov    %rcx, -0x12(%rsi)
 668  668  L(P2Q1):
 669  669          mov    -0xa(%rdi), %r10
 670  670          mov    %r10, -0xa(%rsi)
 671  671  L(P2Q0):
 672  672          movzwq -0x2(%rdi), %r8
 673  673          mov    %r8w, -0x2(%rsi)
 674      -        ret   
      674 +        ret
 675  675  
 676  676          .p2align 4
 677  677  L(P3Q9):
 678  678          mov    -0x4b(%rdi), %r8
 679  679          mov    %r8, -0x4b(%rsi)
 680  680  L(P3Q8):
 681  681          mov    -0x43(%rdi), %rcx
 682  682          mov    %rcx, -0x43(%rsi)
 683  683  L(P3Q7):
 684  684          mov    -0x3b(%rdi), %r10

 685  685          mov    %r10, -0x3b(%rsi)
 686  686  L(P3Q6):
 687  687          mov    -0x33(%rdi), %r8
 688  688          mov    %r8, -0x33(%rsi)
 689  689  L(P3Q5):
 690  690          mov    -0x2b(%rdi), %rcx
 691  691          mov    %rcx, -0x2b(%rsi)
 692  692  L(P3Q4):
 693  693          mov    -0x23(%rdi), %r10
 694  694          mov    %r10, -0x23(%rsi)

↓ open down ↓

10 lines elided

↑ open up ↑

 695  695  L(P3Q3):
 696  696          mov    -0x1b(%rdi), %r8
 697  697          mov    %r8, -0x1b(%rsi)
 698  698  L(P3Q2):
 699  699          mov    -0x13(%rdi), %rcx
 700  700          mov    %rcx, -0x13(%rsi)
 701  701  L(P3Q1):
 702  702          mov    -0xb(%rdi), %r10
 703  703          mov    %r10, -0xb(%rsi)
 704  704          /*
 705      -         * These trailing loads/stores have to do all their loads 1st, 
      705 +         * These trailing loads/stores have to do all their loads 1st,
 706  706           * then do the stores.
 707  707           */
 708  708  L(P3Q0):
 709  709          movzwq -0x3(%rdi), %r8
 710  710          movzbq -0x1(%rdi), %r10
 711  711          mov    %r8w, -0x3(%rsi)
 712  712          mov    %r10b, -0x1(%rsi)
 713      -        ret   
      713 +        ret
 714  714  
 715  715          .p2align 4
 716  716  L(P4Q9):
 717  717          mov    -0x4c(%rdi), %r8
 718  718          mov    %r8, -0x4c(%rsi)
 719  719  L(P4Q8):
 720  720          mov    -0x44(%rdi), %rcx
 721  721          mov    %rcx, -0x44(%rsi)
 722  722  L(P4Q7):
 723  723          mov    -0x3c(%rdi), %r10

 724  724          mov    %r10, -0x3c(%rsi)
 725  725  L(P4Q6):
 726  726          mov    -0x34(%rdi), %r8
 727  727          mov    %r8, -0x34(%rsi)
 728  728  L(P4Q5):
 729  729          mov    -0x2c(%rdi), %rcx
 730  730          mov    %rcx, -0x2c(%rsi)
 731  731  L(P4Q4):
 732  732          mov    -0x24(%rdi), %r10
 733  733          mov    %r10, -0x24(%rsi)
 734  734  L(P4Q3):
 735  735          mov    -0x1c(%rdi), %r8

↓ open down ↓

12 lines elided

↑ open up ↑

 736  736          mov    %r8, -0x1c(%rsi)
 737  737  L(P4Q2):
 738  738          mov    -0x14(%rdi), %rcx
 739  739          mov    %rcx, -0x14(%rsi)
 740  740  L(P4Q1):
 741  741          mov    -0xc(%rdi), %r10
 742  742          mov    %r10, -0xc(%rsi)
 743  743  L(P4Q0):
 744  744          mov    -0x4(%rdi), %r8d
 745  745          mov    %r8d, -0x4(%rsi)
 746      -        ret   
      746 +        ret
 747  747  
 748  748          .p2align 4
 749  749  L(P5Q9):
 750  750          mov    -0x4d(%rdi), %r8
 751  751          mov    %r8, -0x4d(%rsi)
 752  752  L(P5Q8):
 753  753          mov    -0x45(%rdi), %rcx
 754  754          mov    %rcx, -0x45(%rsi)
 755  755  L(P5Q7):
 756  756          mov    -0x3d(%rdi), %r10

 757  757          mov    %r10, -0x3d(%rsi)
 758  758  L(P5Q6):
 759  759          mov    -0x35(%rdi), %r8
 760  760          mov    %r8, -0x35(%rsi)
 761  761  L(P5Q5):
 762  762          mov    -0x2d(%rdi), %rcx
 763  763          mov    %rcx, -0x2d(%rsi)
 764  764  L(P5Q4):
 765  765          mov    -0x25(%rdi), %r10
 766  766          mov    %r10, -0x25(%rsi)
 767  767  L(P5Q3):
 768  768          mov    -0x1d(%rdi), %r8
 769  769          mov    %r8, -0x1d(%rsi)
 770  770  L(P5Q2):

↓ open down ↓

14 lines elided

↑ open up ↑

 771  771          mov    -0x15(%rdi), %rcx
 772  772          mov    %rcx, -0x15(%rsi)
 773  773  L(P5Q1):
 774  774          mov    -0xd(%rdi), %r10
 775  775          mov    %r10, -0xd(%rsi)
 776  776  L(P5Q0):
 777  777          mov    -0x5(%rdi), %r8d
 778  778          movzbq -0x1(%rdi), %r10
 779  779          mov    %r8d, -0x5(%rsi)
 780  780          mov    %r10b, -0x1(%rsi)
 781      -        ret   
      781 +        ret
 782  782  
 783  783          .p2align 4
 784  784  L(P6Q9):
 785  785          mov    -0x4e(%rdi), %r8
 786  786          mov    %r8, -0x4e(%rsi)
 787  787  L(P6Q8):
 788  788          mov    -0x46(%rdi), %rcx
 789  789          mov    %rcx, -0x46(%rsi)
 790  790  L(P6Q7):
 791  791          mov    -0x3e(%rdi), %r10

 792  792          mov    %r10, -0x3e(%rsi)
 793  793  L(P6Q6):
 794  794          mov    -0x36(%rdi), %r8
 795  795          mov    %r8, -0x36(%rsi)
 796  796  L(P6Q5):
 797  797          mov    -0x2e(%rdi), %rcx
 798  798          mov    %rcx, -0x2e(%rsi)
 799  799  L(P6Q4):
 800  800          mov    -0x26(%rdi), %r10
 801  801          mov    %r10, -0x26(%rsi)
 802  802  L(P6Q3):
 803  803          mov    -0x1e(%rdi), %r8
 804  804          mov    %r8, -0x1e(%rsi)
 805  805  L(P6Q2):

↓ open down ↓

14 lines elided

↑ open up ↑

 806  806          mov    -0x16(%rdi), %rcx
 807  807          mov    %rcx, -0x16(%rsi)
 808  808  L(P6Q1):
 809  809          mov    -0xe(%rdi), %r10
 810  810          mov    %r10, -0xe(%rsi)
 811  811  L(P6Q0):
 812  812          mov    -0x6(%rdi), %r8d
 813  813          movzwq -0x2(%rdi), %r10
 814  814          mov    %r8d, -0x6(%rsi)
 815  815          mov    %r10w, -0x2(%rsi)
 816      -        ret   
      816 +        ret
 817  817  
 818  818          .p2align 4
 819  819  L(P7Q9):
 820  820          mov    -0x4f(%rdi), %r8
 821  821          mov    %r8, -0x4f(%rsi)
 822  822  L(P7Q8):
 823  823          mov    -0x47(%rdi), %rcx
 824  824          mov    %rcx, -0x47(%rsi)
 825  825  L(P7Q7):
 826  826          mov    -0x3f(%rdi), %r10

 827  827          mov    %r10, -0x3f(%rsi)
 828  828  L(P7Q6):
 829  829          mov    -0x37(%rdi), %r8
 830  830          mov    %r8, -0x37(%rsi)
 831  831  L(P7Q5):
 832  832          mov    -0x2f(%rdi), %rcx
 833  833          mov    %rcx, -0x2f(%rsi)
 834  834  L(P7Q4):
 835  835          mov    -0x27(%rdi), %r10
 836  836          mov    %r10, -0x27(%rsi)
 837  837  L(P7Q3):
 838  838          mov    -0x1f(%rdi), %r8
 839  839          mov    %r8, -0x1f(%rsi)
 840  840  L(P7Q2):
 841  841          mov    -0x17(%rdi), %rcx
 842  842          mov    %rcx, -0x17(%rsi)

↓ open down ↓

16 lines elided

↑ open up ↑

 843  843  L(P7Q1):
 844  844          mov    -0xf(%rdi), %r10
 845  845          mov    %r10, -0xf(%rsi)
 846  846  L(P7Q0):
 847  847          mov    -0x7(%rdi), %r8d
 848  848          movzwq -0x3(%rdi), %r10
 849  849          movzbq -0x1(%rdi), %rcx
 850  850          mov    %r8d, -0x7(%rsi)
 851  851          mov    %r10w, -0x3(%rsi)
 852  852          mov    %cl, -0x1(%rsi)
 853      -        ret   
      853 +        ret
 854  854  
 855  855          /*
 856  856           * For large sizes rep smovq is fastest.
 857  857           * Transition point determined experimentally as measured on
 858  858           * Intel Xeon processors (incl. Nehalem and previous generations) and
 859  859           * AMD Opteron. The transition value is patched at boot time to avoid
 860  860           * memory reference hit.
 861  861           */
 862  862          .globl bcopy_patch_start
 863  863  bcopy_patch_start:

 864  864          cmpq    $BCOPY_NHM_REP, %rdx
 865  865          .globl bcopy_patch_end
 866  866  bcopy_patch_end:
 867  867  
 868  868          .p2align 4
 869  869          ALTENTRY(bcopy_ck_size)
 870  870  
 871  871          cmpq    $BCOPY_DFLT_REP, %rdx
 872  872          jae     L(use_rep)
 873  873  
 874  874          /*
 875  875           * Align to a 8-byte boundary. Avoids penalties from unaligned stores
 876  876           * as well as from stores spanning cachelines.
 877  877           */
 878  878          test    $0x7, %rsi
 879  879          jz      L(aligned_loop)
 880  880          test    $0x1, %rsi
 881  881          jz      2f
 882  882          movzbq  (%rdi), %r8
 883  883          dec     %rdx
 884  884          inc     %rdi
 885  885          mov     %r8b, (%rsi)
 886  886          inc     %rsi
 887  887  2:
 888  888          test    $0x2, %rsi
 889  889          jz      4f
 890  890          movzwq  (%rdi), %r8
 891  891          sub     $0x2, %rdx
 892  892          add     $0x2, %rdi
 893  893          mov     %r8w, (%rsi)
 894  894          add     $0x2, %rsi
 895  895  4:
 896  896          test    $0x4, %rsi
 897  897          jz      L(aligned_loop)
 898  898          mov     (%rdi), %r8d
 899  899          sub     $0x4, %rdx
 900  900          add     $0x4, %rdi
 901  901          mov     %r8d, (%rsi)
 902  902          add     $0x4, %rsi
 903  903  
 904  904          /*
 905  905           * Copy 64-bytes per loop
 906  906           */
 907  907          .p2align 4
 908  908  L(aligned_loop):
 909  909          mov     (%rdi), %r8
 910  910          mov     0x8(%rdi), %r10
 911  911          lea     -0x40(%rdx), %rdx
 912  912          mov     %r8, (%rsi)
 913  913          mov     %r10, 0x8(%rsi)
 914  914          mov     0x10(%rdi), %rcx
 915  915          mov     0x18(%rdi), %r8
 916  916          mov     %rcx, 0x10(%rsi)
 917  917          mov     %r8, 0x18(%rsi)
 918  918  
 919  919          cmp     $0x40, %rdx
 920  920          mov     0x20(%rdi), %r10
 921  921          mov     0x28(%rdi), %rcx
 922  922          mov     %r10, 0x20(%rsi)
 923  923          mov     %rcx, 0x28(%rsi)
 924  924          mov     0x30(%rdi), %r8
 925  925          mov     0x38(%rdi), %r10
 926  926          lea     0x40(%rdi), %rdi
 927  927          mov     %r8, 0x30(%rsi)
 928  928          mov     %r10, 0x38(%rsi)
 929  929          lea     0x40(%rsi), %rsi
 930  930          jae     L(aligned_loop)

↓ open down ↓

67 lines elided

↑ open up ↑

 931  931  
 932  932          /*
 933  933           * Copy remaining bytes (0-63)
 934  934           */
 935  935  L(do_remainder):
 936  936          leaq    L(fwdPxQx)(%rip), %r10
 937  937          addq    %rdx, %rdi
 938  938          addq    %rdx, %rsi
 939  939          movslq  (%r10,%rdx,4), %rcx
 940  940          leaq    (%rcx,%r10,1), %r10
 941      -        jmpq    *%r10
      941 +        INDIRECT_JMP_REG(r10)
 942  942  
 943  943          /*
 944  944           * Use rep smovq. Clear remainder via unrolled code
 945  945           */
 946  946          .p2align 4
 947  947  L(use_rep):
 948  948          xchgq   %rdi, %rsi              /* %rsi = source, %rdi = destination */
 949  949          movq    %rdx, %rcx              /* %rcx = count */
 950  950          shrq    $3, %rcx                /* 8-byte word count */
 951  951          rep

 952  952            smovq
 953  953  
 954  954          xchgq   %rsi, %rdi              /* %rdi = src, %rsi = destination */
 955  955          andq    $7, %rdx                /* remainder */
 956  956          jnz     L(do_remainder)
 957  957          ret
 958  958  #undef  L
 959  959          SET_SIZE(bcopy_ck_size)
 960  960  
 961  961  #ifdef DEBUG
 962  962          /*
 963  963           * Setup frame on the run-time stack. The end of the input argument
 964  964           * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
 965  965           * always points to the end of the latest allocated stack frame.
 966  966           * panic(const char *format, ...) is a varargs function. When a
 967  967           * function taking variable arguments is called, %rax must be set
 968  968           * to eight times the number of floating point parameters passed
 969  969           * to the function in SSE registers.
 970  970           */
 971  971  call_panic:
 972  972          pushq   %rbp                    /* align stack properly */
 973  973          movq    %rsp, %rbp
 974  974          xorl    %eax, %eax              /* no variable arguments */
 975  975          call    panic                   /* %rdi = format string */
 976  976  #endif
 977  977          SET_SIZE(bcopy_altentry)
 978  978          SET_SIZE(bcopy)
 979  979  
 980  980  #elif defined(__i386)
 981  981  
 982  982  #define ARG_FROM        4
 983  983  #define ARG_TO          8
 984  984  #define ARG_COUNT       12
 985  985  
 986  986          ENTRY(bcopy)
 987  987  #ifdef DEBUG
 988  988          movl    ARG_COUNT(%esp), %eax
 989  989          orl     %eax, %eax
 990  990          jz      1f
 991  991          movl    postbootkernelbase, %eax
 992  992          cmpl    %eax, ARG_FROM(%esp)
 993  993          jb      0f
 994  994          cmpl    %eax, ARG_TO(%esp)
 995  995          jnb     1f
 996  996  0:      pushl   %ebp
 997  997          movl    %esp, %ebp
 998  998          pushl   $.bcopy_panic_msg
 999  999          call    panic
1000 1000  1:
1001 1001  #endif
1002 1002  do_copy:
1003 1003          movl    %esi, %eax              /* save registers */
1004 1004          movl    %edi, %edx
1005 1005          movl    ARG_COUNT(%esp), %ecx
1006 1006          movl    ARG_FROM(%esp), %esi
1007 1007          movl    ARG_TO(%esp), %edi
1008 1008  
1009 1009          shrl    $2, %ecx                /* word count */
1010 1010          rep
1011 1011            smovl
1012 1012          movl    ARG_COUNT(%esp), %ecx
1013 1013          andl    $3, %ecx                /* bytes left over */
1014 1014          rep
1015 1015            smovb
1016 1016          movl    %eax, %esi              /* restore registers */
1017 1017          movl    %edx, %edi
1018 1018          ret
1019 1019          SET_SIZE(bcopy)
1020 1020  
1021 1021  #undef  ARG_COUNT
1022 1022  #undef  ARG_FROM
1023 1023  #undef  ARG_TO
1024 1024  
1025 1025  #endif  /* __i386 */
1026 1026  #endif  /* __lint */
1027 1027  
1028 1028  
1029 1029  /*
1030 1030   * Zero a block of storage, returning an error code if we
1031 1031   * take a kernel pagefault which cannot be resolved.
1032 1032   * Returns errno value on pagefault error, 0 if all ok
1033 1033   */
1034 1034  
1035 1035  #if defined(__lint)
1036 1036  
1037 1037  /* ARGSUSED */
1038 1038  int
1039 1039  kzero(void *addr, size_t count)
1040 1040  { return (0); }
1041 1041  
1042 1042  #else   /* __lint */
1043 1043  
1044 1044  #if defined(__amd64)

↓ open down ↓

93 lines elided

↑ open up ↑

1045 1045  
1046 1046          ENTRY(kzero)
1047 1047  #ifdef DEBUG
1048 1048          cmpq    postbootkernelbase(%rip), %rdi  /* %rdi = addr */
1049 1049          jnb     0f
1050 1050          leaq    .kzero_panic_msg(%rip), %rdi
1051 1051          jmp     call_panic              /* setup stack and call panic */
1052 1052  0:
1053 1053  #endif
1054 1054          /*
1055      -         * pass lofault value as 3rd argument for fault return 
     1055 +         * pass lofault value as 3rd argument for fault return
1056 1056           */
1057 1057          leaq    _kzeroerr(%rip), %rdx
1058 1058  
1059 1059          movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
1060 1060          movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
1061 1061          movq    %rdx, T_LOFAULT(%r9)    /* new lofault */
1062 1062          call    bzero_altentry
1063 1063          xorl    %eax, %eax
1064 1064          movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
1065 1065          ret

1066 1066          /*
1067 1067           * A fault during bzero is indicated through an errno value
1068 1068           * in %rax when we iretq to here.
1069 1069           */
1070 1070  _kzeroerr:
1071 1071          addq    $8, %rsp                /* pop bzero_altentry call ret addr */
1072 1072          movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
1073 1073          ret
1074 1074          SET_SIZE(kzero)
1075 1075  
1076 1076  #elif defined(__i386)
1077 1077  
1078 1078  #define ARG_ADDR        8
1079 1079  #define ARG_COUNT       12
1080 1080  
1081 1081          ENTRY(kzero)
1082 1082  #ifdef DEBUG
1083 1083          pushl   %ebp
1084 1084          movl    %esp, %ebp
1085 1085          movl    postbootkernelbase, %eax
1086 1086          cmpl    %eax, ARG_ADDR(%ebp)
1087 1087          jnb     0f

↓ open down ↓

22 lines elided

↑ open up ↑

1088 1088          pushl   $.kzero_panic_msg
1089 1089          call    panic
1090 1090  0:      popl    %ebp
1091 1091  #endif
1092 1092          lea     _kzeroerr, %eax         /* kzeroerr is lofault value */
1093 1093  
1094 1094          pushl   %ebp                    /* save stack base */
1095 1095          movl    %esp, %ebp              /* set new stack base */
1096 1096          pushl   %edi                    /* save %edi */
1097 1097  
1098      -        mov     %gs:CPU_THREAD, %edx    
     1098 +        mov     %gs:CPU_THREAD, %edx
1099 1099          movl    T_LOFAULT(%edx), %edi
1100 1100          pushl   %edi                    /* save the current lofault */
1101 1101          movl    %eax, T_LOFAULT(%edx)   /* new lofault */
1102 1102  
1103 1103          movl    ARG_COUNT(%ebp), %ecx   /* get size in bytes */
1104 1104          movl    ARG_ADDR(%ebp), %edi    /* %edi <- address of bytes to clear */
1105 1105          shrl    $2, %ecx                /* Count of double words to zero */
1106 1106          xorl    %eax, %eax              /* sstol val */
1107 1107          rep
1108 1108            sstol                 /* %ecx contains words to clear (%eax=0) */

1109 1109  
1110 1110          movl    ARG_COUNT(%ebp), %ecx   /* get size in bytes */
1111 1111          andl    $3, %ecx                /* do mod 4 */
1112 1112          rep
1113 1113            sstob                 /* %ecx contains residual bytes to clear */
1114 1114  
1115 1115          /*
1116 1116           * A fault during kzero is indicated through an errno value
1117 1117           * in %eax when we iret to here.
1118 1118           */
1119 1119  _kzeroerr:
1120 1120          popl    %edi
1121 1121          movl    %edi, T_LOFAULT(%edx)   /* restore the original lofault */
1122 1122          popl    %edi
1123 1123          popl    %ebp
1124 1124          ret
1125 1125          SET_SIZE(kzero)
1126 1126  
1127 1127  #undef  ARG_ADDR
1128 1128  #undef  ARG_COUNT
1129 1129  
1130 1130  #endif  /* __i386 */
1131 1131  #endif  /* __lint */
1132 1132  
1133 1133  /*
1134 1134   * Zero a block of storage.
1135 1135   */
1136 1136  
1137 1137  #if defined(__lint)
1138 1138  
1139 1139  /* ARGSUSED */
1140 1140  void
1141 1141  bzero(void *addr, size_t count)
1142 1142  {}
1143 1143  
1144 1144  #else   /* __lint */
1145 1145  
1146 1146  #if defined(__amd64)
1147 1147  
1148 1148          ENTRY(bzero)
1149 1149  #ifdef DEBUG
1150 1150          cmpq    postbootkernelbase(%rip), %rdi  /* %rdi = addr */
1151 1151          jnb     0f
1152 1152          leaq    .bzero_panic_msg(%rip), %rdi
1153 1153          jmp     call_panic              /* setup stack and call panic */
1154 1154  0:
1155 1155  #endif
1156 1156          ALTENTRY(bzero_altentry)
1157 1157  do_zero:
1158 1158  #define L(s) .bzero/**/s
1159 1159          xorl    %eax, %eax
1160 1160  
1161 1161          cmpq    $0x50, %rsi             /* 80 */
1162 1162          jae     L(ck_align)

↓ open down ↓

54 lines elided

↑ open up ↑

1163 1163  
1164 1164          /*
1165 1165           * Performance data shows many caller's are zeroing small buffers. So
1166 1166           * for best perf for these sizes unrolled code is used. Store zeros
1167 1167           * without worrying about alignment.
1168 1168           */
1169 1169          leaq    L(setPxQx)(%rip), %r10
1170 1170          addq    %rsi, %rdi
1171 1171          movslq  (%r10,%rsi,4), %rcx
1172 1172          leaq    (%rcx,%r10,1), %r10
1173      -        jmpq    *%r10
     1173 +        INDIRECT_JMP_REG(r10)
1174 1174  
1175 1175          .p2align 4
1176 1176  L(setPxQx):
1177 1177          .int       L(P0Q0)-L(setPxQx)   /* 0 */
1178 1178          .int       L(P1Q0)-L(setPxQx)
1179 1179          .int       L(P2Q0)-L(setPxQx)
1180 1180          .int       L(P3Q0)-L(setPxQx)
1181 1181          .int       L(P4Q0)-L(setPxQx)
1182 1182          .int       L(P5Q0)-L(setPxQx)
1183 1183          .int       L(P6Q0)-L(setPxQx)
1184      -        .int       L(P7Q0)-L(setPxQx) 
     1184 +        .int       L(P7Q0)-L(setPxQx)
1185 1185  
1186 1186          .int       L(P0Q1)-L(setPxQx)   /* 8 */
1187 1187          .int       L(P1Q1)-L(setPxQx)
1188 1188          .int       L(P2Q1)-L(setPxQx)
1189 1189          .int       L(P3Q1)-L(setPxQx)
1190 1190          .int       L(P4Q1)-L(setPxQx)
1191 1191          .int       L(P5Q1)-L(setPxQx)
1192 1192          .int       L(P6Q1)-L(setPxQx)
1193      -        .int       L(P7Q1)-L(setPxQx) 
     1193 +        .int       L(P7Q1)-L(setPxQx)
1194 1194  
1195 1195          .int       L(P0Q2)-L(setPxQx)   /* 16 */
1196 1196          .int       L(P1Q2)-L(setPxQx)
1197 1197          .int       L(P2Q2)-L(setPxQx)
1198 1198          .int       L(P3Q2)-L(setPxQx)
1199 1199          .int       L(P4Q2)-L(setPxQx)
1200 1200          .int       L(P5Q2)-L(setPxQx)
1201 1201          .int       L(P6Q2)-L(setPxQx)
1202      -        .int       L(P7Q2)-L(setPxQx) 
     1202 +        .int       L(P7Q2)-L(setPxQx)
1203 1203  
1204 1204          .int       L(P0Q3)-L(setPxQx)   /* 24 */
1205 1205          .int       L(P1Q3)-L(setPxQx)
1206 1206          .int       L(P2Q3)-L(setPxQx)
1207 1207          .int       L(P3Q3)-L(setPxQx)
1208 1208          .int       L(P4Q3)-L(setPxQx)
1209 1209          .int       L(P5Q3)-L(setPxQx)
1210 1210          .int       L(P6Q3)-L(setPxQx)
1211      -        .int       L(P7Q3)-L(setPxQx) 
     1211 +        .int       L(P7Q3)-L(setPxQx)
1212 1212  
1213 1213          .int       L(P0Q4)-L(setPxQx)   /* 32 */
1214 1214          .int       L(P1Q4)-L(setPxQx)
1215 1215          .int       L(P2Q4)-L(setPxQx)
1216 1216          .int       L(P3Q4)-L(setPxQx)
1217 1217          .int       L(P4Q4)-L(setPxQx)
1218 1218          .int       L(P5Q4)-L(setPxQx)
1219 1219          .int       L(P6Q4)-L(setPxQx)
1220      -        .int       L(P7Q4)-L(setPxQx) 
     1220 +        .int       L(P7Q4)-L(setPxQx)
1221 1221  
1222 1222          .int       L(P0Q5)-L(setPxQx)   /* 40 */
1223 1223          .int       L(P1Q5)-L(setPxQx)
1224 1224          .int       L(P2Q5)-L(setPxQx)
1225 1225          .int       L(P3Q5)-L(setPxQx)
1226 1226          .int       L(P4Q5)-L(setPxQx)
1227 1227          .int       L(P5Q5)-L(setPxQx)
1228 1228          .int       L(P6Q5)-L(setPxQx)
1229      -        .int       L(P7Q5)-L(setPxQx) 
     1229 +        .int       L(P7Q5)-L(setPxQx)
1230 1230  
1231 1231          .int       L(P0Q6)-L(setPxQx)   /* 48 */
1232 1232          .int       L(P1Q6)-L(setPxQx)
1233 1233          .int       L(P2Q6)-L(setPxQx)
1234 1234          .int       L(P3Q6)-L(setPxQx)
1235 1235          .int       L(P4Q6)-L(setPxQx)
1236 1236          .int       L(P5Q6)-L(setPxQx)
1237 1237          .int       L(P6Q6)-L(setPxQx)
1238      -        .int       L(P7Q6)-L(setPxQx) 
     1238 +        .int       L(P7Q6)-L(setPxQx)
1239 1239  
1240 1240          .int       L(P0Q7)-L(setPxQx)   /* 56 */
1241 1241          .int       L(P1Q7)-L(setPxQx)
1242 1242          .int       L(P2Q7)-L(setPxQx)
1243 1243          .int       L(P3Q7)-L(setPxQx)
1244 1244          .int       L(P4Q7)-L(setPxQx)
1245 1245          .int       L(P5Q7)-L(setPxQx)
1246 1246          .int       L(P6Q7)-L(setPxQx)
1247      -        .int       L(P7Q7)-L(setPxQx) 
     1247 +        .int       L(P7Q7)-L(setPxQx)
1248 1248  
1249 1249          .int       L(P0Q8)-L(setPxQx)   /* 64 */
1250 1250          .int       L(P1Q8)-L(setPxQx)
1251 1251          .int       L(P2Q8)-L(setPxQx)
1252 1252          .int       L(P3Q8)-L(setPxQx)
1253 1253          .int       L(P4Q8)-L(setPxQx)
1254 1254          .int       L(P5Q8)-L(setPxQx)
1255 1255          .int       L(P6Q8)-L(setPxQx)
1256 1256          .int       L(P7Q8)-L(setPxQx)
1257 1257

1258 1258          .int       L(P0Q9)-L(setPxQx)   /* 72 */
1259 1259          .int       L(P1Q9)-L(setPxQx)
1260 1260          .int       L(P2Q9)-L(setPxQx)
1261 1261          .int       L(P3Q9)-L(setPxQx)
1262 1262          .int       L(P4Q9)-L(setPxQx)
1263 1263          .int       L(P5Q9)-L(setPxQx)
1264 1264          .int       L(P6Q9)-L(setPxQx)
1265 1265          .int       L(P7Q9)-L(setPxQx)   /* 79 */
1266 1266

↓ open down ↓

9 lines elided

↑ open up ↑

1267 1267          .p2align 4
1268 1268  L(P0Q9): mov    %rax, -0x48(%rdi)
1269 1269  L(P0Q8): mov    %rax, -0x40(%rdi)
1270 1270  L(P0Q7): mov    %rax, -0x38(%rdi)
1271 1271  L(P0Q6): mov    %rax, -0x30(%rdi)
1272 1272  L(P0Q5): mov    %rax, -0x28(%rdi)
1273 1273  L(P0Q4): mov    %rax, -0x20(%rdi)
1274 1274  L(P0Q3): mov    %rax, -0x18(%rdi)
1275 1275  L(P0Q2): mov    %rax, -0x10(%rdi)
1276 1276  L(P0Q1): mov    %rax, -0x8(%rdi)
1277      -L(P0Q0): 
     1277 +L(P0Q0):
1278 1278           ret
1279 1279  
1280 1280          .p2align 4
1281 1281  L(P1Q9): mov    %rax, -0x49(%rdi)
1282 1282  L(P1Q8): mov    %rax, -0x41(%rdi)
1283 1283  L(P1Q7): mov    %rax, -0x39(%rdi)
1284 1284  L(P1Q6): mov    %rax, -0x31(%rdi)
1285 1285  L(P1Q5): mov    %rax, -0x29(%rdi)
1286 1286  L(P1Q4): mov    %rax, -0x21(%rdi)
1287 1287  L(P1Q3): mov    %rax, -0x19(%rdi)

1288 1288  L(P1Q2): mov    %rax, -0x11(%rdi)
1289 1289  L(P1Q1): mov    %rax, -0x9(%rdi)
1290 1290  L(P1Q0): mov    %al, -0x1(%rdi)
1291 1291           ret
1292 1292  
1293 1293          .p2align 4
1294 1294  L(P2Q9): mov    %rax, -0x4a(%rdi)
1295 1295  L(P2Q8): mov    %rax, -0x42(%rdi)
1296 1296  L(P2Q7): mov    %rax, -0x3a(%rdi)
1297 1297  L(P2Q6): mov    %rax, -0x32(%rdi)
1298 1298  L(P2Q5): mov    %rax, -0x2a(%rdi)
1299 1299  L(P2Q4): mov    %rax, -0x22(%rdi)
1300 1300  L(P2Q3): mov    %rax, -0x1a(%rdi)
1301 1301  L(P2Q2): mov    %rax, -0x12(%rdi)
1302 1302  L(P2Q1): mov    %rax, -0xa(%rdi)
1303 1303  L(P2Q0): mov    %ax, -0x2(%rdi)
1304 1304           ret
1305 1305  
1306 1306          .p2align 4
1307 1307  L(P3Q9): mov    %rax, -0x4b(%rdi)
1308 1308  L(P3Q8): mov    %rax, -0x43(%rdi)
1309 1309  L(P3Q7): mov    %rax, -0x3b(%rdi)
1310 1310  L(P3Q6): mov    %rax, -0x33(%rdi)
1311 1311  L(P3Q5): mov    %rax, -0x2b(%rdi)
1312 1312  L(P3Q4): mov    %rax, -0x23(%rdi)
1313 1313  L(P3Q3): mov    %rax, -0x1b(%rdi)
1314 1314  L(P3Q2): mov    %rax, -0x13(%rdi)
1315 1315  L(P3Q1): mov    %rax, -0xb(%rdi)
1316 1316  L(P3Q0): mov    %ax, -0x3(%rdi)
1317 1317           mov    %al, -0x1(%rdi)
1318 1318           ret
1319 1319  
1320 1320          .p2align 4
1321 1321  L(P4Q9): mov    %rax, -0x4c(%rdi)
1322 1322  L(P4Q8): mov    %rax, -0x44(%rdi)
1323 1323  L(P4Q7): mov    %rax, -0x3c(%rdi)
1324 1324  L(P4Q6): mov    %rax, -0x34(%rdi)
1325 1325  L(P4Q5): mov    %rax, -0x2c(%rdi)
1326 1326  L(P4Q4): mov    %rax, -0x24(%rdi)
1327 1327  L(P4Q3): mov    %rax, -0x1c(%rdi)
1328 1328  L(P4Q2): mov    %rax, -0x14(%rdi)
1329 1329  L(P4Q1): mov    %rax, -0xc(%rdi)
1330 1330  L(P4Q0): mov    %eax, -0x4(%rdi)
1331 1331           ret
1332 1332  
1333 1333          .p2align 4
1334 1334  L(P5Q9): mov    %rax, -0x4d(%rdi)
1335 1335  L(P5Q8): mov    %rax, -0x45(%rdi)
1336 1336  L(P5Q7): mov    %rax, -0x3d(%rdi)
1337 1337  L(P5Q6): mov    %rax, -0x35(%rdi)
1338 1338  L(P5Q5): mov    %rax, -0x2d(%rdi)
1339 1339  L(P5Q4): mov    %rax, -0x25(%rdi)
1340 1340  L(P5Q3): mov    %rax, -0x1d(%rdi)
1341 1341  L(P5Q2): mov    %rax, -0x15(%rdi)
1342 1342  L(P5Q1): mov    %rax, -0xd(%rdi)
1343 1343  L(P5Q0): mov    %eax, -0x5(%rdi)
1344 1344           mov    %al, -0x1(%rdi)
1345 1345           ret
1346 1346  
1347 1347          .p2align 4
1348 1348  L(P6Q9): mov    %rax, -0x4e(%rdi)
1349 1349  L(P6Q8): mov    %rax, -0x46(%rdi)
1350 1350  L(P6Q7): mov    %rax, -0x3e(%rdi)
1351 1351  L(P6Q6): mov    %rax, -0x36(%rdi)
1352 1352  L(P6Q5): mov    %rax, -0x2e(%rdi)
1353 1353  L(P6Q4): mov    %rax, -0x26(%rdi)
1354 1354  L(P6Q3): mov    %rax, -0x1e(%rdi)
1355 1355  L(P6Q2): mov    %rax, -0x16(%rdi)
1356 1356  L(P6Q1): mov    %rax, -0xe(%rdi)
1357 1357  L(P6Q0): mov    %eax, -0x6(%rdi)
1358 1358           mov    %ax, -0x2(%rdi)
1359 1359           ret
1360 1360  
1361 1361          .p2align 4
1362 1362  L(P7Q9): mov    %rax, -0x4f(%rdi)
1363 1363  L(P7Q8): mov    %rax, -0x47(%rdi)
1364 1364  L(P7Q7): mov    %rax, -0x3f(%rdi)
1365 1365  L(P7Q6): mov    %rax, -0x37(%rdi)
1366 1366  L(P7Q5): mov    %rax, -0x2f(%rdi)
1367 1367  L(P7Q4): mov    %rax, -0x27(%rdi)
1368 1368  L(P7Q3): mov    %rax, -0x1f(%rdi)
1369 1369  L(P7Q2): mov    %rax, -0x17(%rdi)
1370 1370  L(P7Q1): mov    %rax, -0xf(%rdi)
1371 1371  L(P7Q0): mov    %eax, -0x7(%rdi)
1372 1372           mov    %ax, -0x3(%rdi)
1373 1373           mov    %al, -0x1(%rdi)
1374 1374           ret
1375 1375  
1376 1376          /*
1377 1377           * Align to a 16-byte boundary. Avoids penalties from unaligned stores
1378 1378           * as well as from stores spanning cachelines. Note 16-byte alignment
1379 1379           * is better in case where rep sstosq is used.
1380 1380           */
1381 1381          .p2align 4
1382 1382  L(ck_align):
1383 1383          test    $0xf, %rdi
1384 1384          jz      L(aligned_now)
1385 1385          test    $1, %rdi
1386 1386          jz      2f
1387 1387          mov     %al, (%rdi)
1388 1388          dec     %rsi
1389 1389          lea     1(%rdi),%rdi
1390 1390  2:
1391 1391          test    $2, %rdi
1392 1392          jz      4f
1393 1393          mov     %ax, (%rdi)
1394 1394          sub     $2, %rsi
1395 1395          lea     2(%rdi),%rdi
1396 1396  4:
1397 1397          test    $4, %rdi
1398 1398          jz      8f
1399 1399          mov     %eax, (%rdi)
1400 1400          sub     $4, %rsi
1401 1401          lea     4(%rdi),%rdi
1402 1402  8:
1403 1403          test    $8, %rdi
1404 1404          jz      L(aligned_now)
1405 1405          mov     %rax, (%rdi)
1406 1406          sub     $8, %rsi
1407 1407          lea     8(%rdi),%rdi
1408 1408  
1409 1409          /*
1410 1410           * For large sizes rep sstoq is fastest.
1411 1411           * Transition point determined experimentally as measured on
1412 1412           * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
1413 1413           */
1414 1414  L(aligned_now):

↓ open down ↓

127 lines elided

↑ open up ↑

1415 1415          cmp     $BZERO_USE_REP, %rsi
1416 1416          ja      L(use_rep)
1417 1417  
1418 1418          /*
1419 1419           * zero 64-bytes per loop
1420 1420           */
1421 1421          .p2align 4
1422 1422  L(bzero_loop):
1423 1423          leaq    -0x40(%rsi), %rsi
1424 1424          cmpq    $0x40, %rsi
1425      -        movq    %rax, (%rdi) 
1426      -        movq    %rax, 0x8(%rdi) 
1427      -        movq    %rax, 0x10(%rdi) 
1428      -        movq    %rax, 0x18(%rdi) 
1429      -        movq    %rax, 0x20(%rdi) 
1430      -        movq    %rax, 0x28(%rdi) 
1431      -        movq    %rax, 0x30(%rdi) 
1432      -        movq    %rax, 0x38(%rdi) 
     1425 +        movq    %rax, (%rdi)
     1426 +        movq    %rax, 0x8(%rdi)
     1427 +        movq    %rax, 0x10(%rdi)
     1428 +        movq    %rax, 0x18(%rdi)
     1429 +        movq    %rax, 0x20(%rdi)
     1430 +        movq    %rax, 0x28(%rdi)
     1431 +        movq    %rax, 0x30(%rdi)
     1432 +        movq    %rax, 0x38(%rdi)
1433 1433          leaq    0x40(%rdi), %rdi
1434 1434          jae     L(bzero_loop)
1435 1435  
1436 1436          /*
1437 1437           * Clear any remaining bytes..
1438 1438           */
1439 1439  9:
1440 1440          leaq    L(setPxQx)(%rip), %r10
1441 1441          addq    %rsi, %rdi
1442 1442          movslq  (%r10,%rsi,4), %rcx
1443 1443          leaq    (%rcx,%r10,1), %r10
1444      -        jmpq    *%r10
     1444 +        INDIRECT_JMP_REG(r10)
1445 1445  
1446 1446          /*
1447 1447           * Use rep sstoq. Clear any remainder via unrolled code
1448 1448           */
1449 1449          .p2align 4
1450 1450  L(use_rep):
1451 1451          movq    %rsi, %rcx              /* get size in bytes */
1452 1452          shrq    $3, %rcx                /* count of 8-byte words to zero */
1453 1453          rep
1454 1454            sstoq                         /* %rcx = words to clear (%rax=0) */

1455 1455          andq    $7, %rsi                /* remaining bytes */
1456 1456          jnz     9b
1457 1457          ret
1458 1458  #undef  L
1459 1459          SET_SIZE(bzero_altentry)
1460 1460          SET_SIZE(bzero)
1461 1461  
1462 1462  #elif defined(__i386)
1463 1463  
1464 1464  #define ARG_ADDR        4
1465 1465  #define ARG_COUNT       8
1466 1466  
1467 1467          ENTRY(bzero)
1468 1468  #ifdef DEBUG
1469 1469          movl    postbootkernelbase, %eax
1470 1470          cmpl    %eax, ARG_ADDR(%esp)
1471 1471          jnb     0f
1472 1472          pushl   %ebp
1473 1473          movl    %esp, %ebp
1474 1474          pushl   $.bzero_panic_msg
1475 1475          call    panic
1476 1476  0:
1477 1477  #endif
1478 1478  do_zero:
1479 1479          movl    %edi, %edx
1480 1480          movl    ARG_COUNT(%esp), %ecx
1481 1481          movl    ARG_ADDR(%esp), %edi
1482 1482          shrl    $2, %ecx
1483 1483          xorl    %eax, %eax
1484 1484          rep
1485 1485            sstol
1486 1486          movl    ARG_COUNT(%esp), %ecx
1487 1487          andl    $3, %ecx
1488 1488          rep
1489 1489            sstob
1490 1490          movl    %edx, %edi
1491 1491          ret
1492 1492          SET_SIZE(bzero)
1493 1493  
1494 1494  #undef  ARG_ADDR
1495 1495  #undef  ARG_COUNT
1496 1496  
1497 1497  #endif  /* __i386 */
1498 1498  #endif  /* __lint */
1499 1499  
1500 1500  /*
1501 1501   * Transfer data to and from user space -
1502 1502   * Note that these routines can cause faults
1503 1503   * It is assumed that the kernel has nothing at
1504 1504   * less than KERNELBASE in the virtual address space.
1505 1505   *
1506 1506   * Note that copyin(9F) and copyout(9F) are part of the
1507 1507   * DDI/DKI which specifies that they return '-1' on "errors."
1508 1508   *
1509 1509   * Sigh.
1510 1510   *
1511 1511   * So there's two extremely similar routines - xcopyin_nta() and
1512 1512   * xcopyout_nta() which return the errno that we've faithfully computed.
1513 1513   * This allows other callers (e.g. uiomove(9F)) to work correctly.
1514 1514   * Given that these are used pretty heavily, we expand the calling
1515 1515   * sequences inline for all flavours (rather than making wrappers).
1516 1516   */
1517 1517  
1518 1518  /*
1519 1519   * Copy user data to kernel space.
1520 1520   */
1521 1521  
1522 1522  #if defined(__lint)
1523 1523  
1524 1524  /* ARGSUSED */
1525 1525  int
1526 1526  copyin(const void *uaddr, void *kaddr, size_t count)
1527 1527  { return (0); }
1528 1528  
1529 1529  #else   /* lint */
1530 1530  
1531 1531  #if defined(__amd64)
1532 1532  
1533 1533          ENTRY(copyin)
1534 1534          pushq   %rbp
1535 1535          movq    %rsp, %rbp
1536 1536          subq    $24, %rsp
1537 1537  
1538 1538          /*
1539 1539           * save args in case we trap and need to rerun as a copyop
1540 1540           */
1541 1541          movq    %rdi, (%rsp)
1542 1542          movq    %rsi, 0x8(%rsp)
1543 1543          movq    %rdx, 0x10(%rsp)
1544 1544  
1545 1545          movq    kernelbase(%rip), %rax
1546 1546  #ifdef DEBUG
1547 1547          cmpq    %rax, %rsi              /* %rsi = kaddr */
1548 1548          jnb     1f
1549 1549          leaq    .copyin_panic_msg(%rip), %rdi
1550 1550          xorl    %eax, %eax
1551 1551          call    panic
1552 1552  1:
1553 1553  #endif
1554 1554          /*
1555 1555           * pass lofault value as 4th argument to do_copy_fault
1556 1556           */

↓ open down ↓

102 lines elided

↑ open up ↑

1557 1557          leaq    _copyin_err(%rip), %rcx
1558 1558  
1559 1559          movq    %gs:CPU_THREAD, %r9
1560 1560          cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1561 1561          jae     3f                      /* take copyop if uaddr > kernelbase */
1562 1562          SMAP_DISABLE_INSTR(0)
1563 1563          jmp     do_copy_fault           /* Takes care of leave for us */
1564 1564  
1565 1565  _copyin_err:
1566 1566          SMAP_ENABLE_INSTR(2)
1567      -        movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */  
     1567 +        movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1568 1568          addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1569 1569  3:
1570 1570          movq    T_COPYOPS(%r9), %rax
1571 1571          cmpq    $0, %rax
1572 1572          jz      2f
1573 1573          /*
1574 1574           * reload args for the copyop
1575 1575           */
1576 1576          movq    (%rsp), %rdi
1577 1577          movq    0x8(%rsp), %rsi
1578 1578          movq    0x10(%rsp), %rdx
1579 1579          leave
1580      -        jmp     *CP_COPYIN(%rax)
     1580 +        movq    CP_COPYIN(%rax), %rax
     1581 +        INDIRECT_JMP_REG(rax)
1581 1582  
1582      -2:      movl    $-1, %eax       
     1583 +2:      movl    $-1, %eax
1583 1584          leave
1584 1585          ret
1585 1586          SET_SIZE(copyin)
1586 1587  
1587 1588  #elif defined(__i386)
1588 1589  
1589 1590  #define ARG_UADDR       4
1590 1591  #define ARG_KADDR       8
1591 1592  
1592 1593          ENTRY(copyin)

1593 1594          movl    kernelbase, %ecx
1594 1595  #ifdef DEBUG
1595 1596          cmpl    %ecx, ARG_KADDR(%esp)
1596 1597          jnb     1f
1597 1598          pushl   %ebp
1598 1599          movl    %esp, %ebp
1599 1600          pushl   $.copyin_panic_msg
1600 1601          call    panic
1601 1602  1:
1602 1603  #endif
1603 1604          lea     _copyin_err, %eax
1604 1605  
1605 1606          movl    %gs:CPU_THREAD, %edx
1606 1607          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1607 1608          jb      do_copy_fault
1608 1609          jmp     3f
1609 1610  
1610 1611  _copyin_err:
1611 1612          popl    %ecx
1612 1613          popl    %edi
1613 1614          movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1614 1615          popl    %esi
1615 1616          popl    %ebp
1616 1617  3:
1617 1618          movl    T_COPYOPS(%edx), %eax
1618 1619          cmpl    $0, %eax
1619 1620          jz      2f
1620 1621          jmp     *CP_COPYIN(%eax)
1621 1622  
1622 1623  2:      movl    $-1, %eax
1623 1624          ret
1624 1625          SET_SIZE(copyin)
1625 1626  
1626 1627  #undef  ARG_UADDR
1627 1628  #undef  ARG_KADDR
1628 1629  
1629 1630  #endif  /* __i386 */
1630 1631  #endif  /* __lint */
1631 1632  
1632 1633  #if defined(__lint)
1633 1634  
1634 1635  /* ARGSUSED */
1635 1636  int
1636 1637  xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached)
1637 1638  { return (0); }
1638 1639  
1639 1640  #else   /* __lint */
1640 1641  
1641 1642  #if defined(__amd64)
1642 1643  
1643 1644          ENTRY(xcopyin_nta)
1644 1645          pushq   %rbp
1645 1646          movq    %rsp, %rbp
1646 1647          subq    $24, %rsp
1647 1648  
1648 1649          /*
1649 1650           * save args in case we trap and need to rerun as a copyop
1650 1651           * %rcx is consumed in this routine so we don't need to save
1651 1652           * it.
1652 1653           */
1653 1654          movq    %rdi, (%rsp)
1654 1655          movq    %rsi, 0x8(%rsp)
1655 1656          movq    %rdx, 0x10(%rsp)
1656 1657  
1657 1658          movq    kernelbase(%rip), %rax
1658 1659  #ifdef DEBUG
1659 1660          cmpq    %rax, %rsi              /* %rsi = kaddr */
1660 1661          jnb     1f
1661 1662          leaq    .xcopyin_panic_msg(%rip), %rdi
1662 1663          xorl    %eax, %eax
1663 1664          call    panic
1664 1665  1:
1665 1666  #endif
1666 1667          movq    %gs:CPU_THREAD, %r9
1667 1668          cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1668 1669          jae     4f
1669 1670          cmpq    $0, %rcx                /* No non-temporal access? */
1670 1671          /*
1671 1672           * pass lofault value as 4th argument to do_copy_fault
1672 1673           */

↓ open down ↓

80 lines elided

↑ open up ↑

1673 1674          leaq    _xcopyin_err(%rip), %rcx        /* doesn't set rflags */
1674 1675          jnz     6f                      /* use regular access */
1675 1676          /*
1676 1677           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1677 1678           */
1678 1679          cmpq    $XCOPY_MIN_SIZE, %rdx
1679 1680          jae     5f
1680 1681  6:
1681 1682          SMAP_DISABLE_INSTR(1)
1682 1683          jmp     do_copy_fault
1683      -        
     1684 +
1684 1685          /*
1685 1686           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1686 1687           * count is COUNT_ALIGN_SIZE aligned.
1687 1688           */
1688 1689  5:
1689 1690          movq    %rdi, %r10
1690 1691          orq     %rsi, %r10
1691 1692          andq    $NTA_ALIGN_MASK, %r10
1692 1693          orq     %rdx, %r10
1693 1694          andq    $COUNT_ALIGN_MASK, %r10
1694      -        jnz     6b      
     1695 +        jnz     6b
1695 1696          leaq    _xcopyin_nta_err(%rip), %rcx    /* doesn't set rflags */
1696 1697          SMAP_DISABLE_INSTR(2)
1697 1698          jmp     do_copy_fault_nta       /* use non-temporal access */
1698      -        
     1699 +
1699 1700  4:
1700 1701          movl    $EFAULT, %eax
1701 1702          jmp     3f
1702 1703  
1703 1704          /*
1704 1705           * A fault during do_copy_fault or do_copy_fault_nta is
1705 1706           * indicated through an errno value in %rax and we iret from the
1706 1707           * trap handler to here.
1707 1708           */
1708 1709  _xcopyin_err:

1709 1710          addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1710 1711  _xcopyin_nta_err:
1711 1712          SMAP_ENABLE_INSTR(3)
1712 1713          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1713 1714  3:
1714 1715          movq    T_COPYOPS(%r9), %r8

↓ open down ↓

6 lines elided

↑ open up ↑

1715 1716          cmpq    $0, %r8
1716 1717          jz      2f
1717 1718  
1718 1719          /*
1719 1720           * reload args for the copyop
1720 1721           */
1721 1722          movq    (%rsp), %rdi
1722 1723          movq    0x8(%rsp), %rsi
1723 1724          movq    0x10(%rsp), %rdx
1724 1725          leave
1725      -        jmp     *CP_XCOPYIN(%r8)
     1726 +        movq    CP_XCOPYIN(%r8), %r8
     1727 +        INDIRECT_JMP_REG(r8)
1726 1728  
1727 1729  2:      leave
1728 1730          ret
1729 1731          SET_SIZE(xcopyin_nta)
1730 1732  
1731 1733  #elif defined(__i386)
1732 1734  
1733 1735  #define ARG_UADDR       4
1734 1736  #define ARG_KADDR       8
1735 1737  #define ARG_COUNT       12

1736 1738  #define ARG_CACHED      16
1737 1739  
1738 1740          .globl  use_sse_copy
1739 1741  
1740 1742          ENTRY(xcopyin_nta)
1741 1743          movl    kernelbase, %ecx
1742 1744          lea     _xcopyin_err, %eax
1743 1745          movl    %gs:CPU_THREAD, %edx
1744 1746          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1745 1747          jae     4f
1746 1748  
1747 1749          cmpl    $0, use_sse_copy        /* no sse support */

↓ open down ↓

12 lines elided

↑ open up ↑

1748 1750          jz      do_copy_fault
1749 1751  
1750 1752          cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
1751 1753          jnz     do_copy_fault
1752 1754  
1753 1755          /*
1754 1756           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1755 1757           */
1756 1758          cmpl    $XCOPY_MIN_SIZE, ARG_COUNT(%esp)
1757 1759          jb      do_copy_fault
1758      -        
     1760 +
1759 1761          /*
1760 1762           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1761 1763           * count is COUNT_ALIGN_SIZE aligned.
1762 1764           */
1763 1765          movl    ARG_UADDR(%esp), %ecx
1764 1766          orl     ARG_KADDR(%esp), %ecx
1765 1767          andl    $NTA_ALIGN_MASK, %ecx
1766 1768          orl     ARG_COUNT(%esp), %ecx
1767 1769          andl    $COUNT_ALIGN_MASK, %ecx
1768 1770          jnz     do_copy_fault

1769 1771  
1770 1772          jmp     do_copy_fault_nta       /* use regular access */
1771 1773  
1772 1774  4:
1773 1775          movl    $EFAULT, %eax
1774 1776          jmp     3f
1775 1777  
1776 1778          /*
1777 1779           * A fault during do_copy_fault or do_copy_fault_nta is
1778 1780           * indicated through an errno value in %eax and we iret from the
1779 1781           * trap handler to here.
1780 1782           */
1781 1783  _xcopyin_err:
1782 1784          popl    %ecx

↓ open down ↓

14 lines elided

↑ open up ↑

1783 1785          popl    %edi
1784 1786          movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1785 1787          popl    %esi
1786 1788          popl    %ebp
1787 1789  3:
1788 1790          cmpl    $0, T_COPYOPS(%edx)
1789 1791          jz      2f
1790 1792          movl    T_COPYOPS(%edx), %eax
1791 1793          jmp     *CP_XCOPYIN(%eax)
1792 1794  
1793      -2:      rep;    ret     /* use 2 byte return instruction when branch target */
     1795 +2:      rep;    ret     /* use 2 byte return instruction when branch target */
1794 1796                          /* AMD Software Optimization Guide - Section 6.2 */
1795 1797          SET_SIZE(xcopyin_nta)
1796 1798  
1797 1799  #undef  ARG_UADDR
1798 1800  #undef  ARG_KADDR
1799 1801  #undef  ARG_COUNT
1800 1802  #undef  ARG_CACHED
1801 1803  
1802 1804  #endif  /* __i386 */
1803 1805  #endif  /* __lint */

1804 1806  
1805 1807  /*
1806 1808   * Copy kernel data to user space.
1807 1809   */
1808 1810  
1809 1811  #if defined(__lint)
1810 1812  
1811 1813  /* ARGSUSED */
1812 1814  int
1813 1815  copyout(const void *kaddr, void *uaddr, size_t count)
1814 1816  { return (0); }
1815 1817  
1816 1818  #else   /* __lint */
1817 1819  
1818 1820  #if defined(__amd64)
1819 1821  
1820 1822          ENTRY(copyout)
1821 1823          pushq   %rbp
1822 1824          movq    %rsp, %rbp
1823 1825          subq    $24, %rsp
1824 1826  
1825 1827          /*
1826 1828           * save args in case we trap and need to rerun as a copyop
1827 1829           */
1828 1830          movq    %rdi, (%rsp)
1829 1831          movq    %rsi, 0x8(%rsp)
1830 1832          movq    %rdx, 0x10(%rsp)
1831 1833  
1832 1834          movq    kernelbase(%rip), %rax
1833 1835  #ifdef DEBUG
1834 1836          cmpq    %rax, %rdi              /* %rdi = kaddr */
1835 1837          jnb     1f
1836 1838          leaq    .copyout_panic_msg(%rip), %rdi
1837 1839          xorl    %eax, %eax
1838 1840          call    panic
1839 1841  1:
1840 1842  #endif
1841 1843          /*
1842 1844           * pass lofault value as 4th argument to do_copy_fault
1843 1845           */
1844 1846          leaq    _copyout_err(%rip), %rcx
1845 1847  
1846 1848          movq    %gs:CPU_THREAD, %r9
1847 1849          cmpq    %rax, %rsi              /* test uaddr < kernelbase */
1848 1850          jae     3f                      /* take copyop if uaddr > kernelbase */
1849 1851          SMAP_DISABLE_INSTR(3)
1850 1852          jmp     do_copy_fault           /* Calls leave for us */
1851 1853  
1852 1854  _copyout_err:
1853 1855          SMAP_ENABLE_INSTR(4)
1854 1856          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1855 1857          addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1856 1858  3:
1857 1859          movq    T_COPYOPS(%r9), %rax

↓ open down ↓

54 lines elided

↑ open up ↑

1858 1860          cmpq    $0, %rax
1859 1861          jz      2f
1860 1862  
1861 1863          /*
1862 1864           * reload args for the copyop
1863 1865           */
1864 1866          movq    (%rsp), %rdi
1865 1867          movq    0x8(%rsp), %rsi
1866 1868          movq    0x10(%rsp), %rdx
1867 1869          leave
1868      -        jmp     *CP_COPYOUT(%rax)
     1870 +        movq    CP_COPYOUT(%rax), %rax
     1871 +        INDIRECT_JMP_REG(rax)
1869 1872  
1870 1873  2:      movl    $-1, %eax
1871 1874          leave
1872 1875          ret
1873 1876          SET_SIZE(copyout)
1874 1877  
1875 1878  #elif defined(__i386)
1876 1879  
1877 1880  #define ARG_KADDR       4
1878 1881  #define ARG_UADDR       8

1879 1882  
1880 1883          ENTRY(copyout)
1881 1884          movl    kernelbase, %ecx
1882 1885  #ifdef DEBUG
1883 1886          cmpl    %ecx, ARG_KADDR(%esp)
1884 1887          jnb     1f
1885 1888          pushl   %ebp

↓ open down ↓

7 lines elided

↑ open up ↑

1886 1889          movl    %esp, %ebp
1887 1890          pushl   $.copyout_panic_msg
1888 1891          call    panic
1889 1892  1:
1890 1893  #endif
1891 1894          lea     _copyout_err, %eax
1892 1895          movl    %gs:CPU_THREAD, %edx
1893 1896          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1894 1897          jb      do_copy_fault
1895 1898          jmp     3f
1896      -        
     1899 +
1897 1900  _copyout_err:
1898 1901          popl    %ecx
1899 1902          popl    %edi
1900 1903          movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1901 1904          popl    %esi
1902 1905          popl    %ebp
1903 1906  3:
1904 1907          movl    T_COPYOPS(%edx), %eax
1905 1908          cmpl    $0, %eax
1906 1909          jz      2f

1907 1910          jmp     *CP_COPYOUT(%eax)
1908 1911  
1909 1912  2:      movl    $-1, %eax
1910 1913          ret
1911 1914          SET_SIZE(copyout)
1912 1915  
1913 1916  #undef  ARG_UADDR
1914 1917  #undef  ARG_KADDR
1915 1918  
1916 1919  #endif  /* __i386 */
1917 1920  #endif  /* __lint */
1918 1921  
1919 1922  #if defined(__lint)
1920 1923  
1921 1924  /* ARGSUSED */
1922 1925  int
1923 1926  xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached)
1924 1927  { return (0); }
1925 1928  
1926 1929  #else   /* __lint */
1927 1930  
1928 1931  #if defined(__amd64)
1929 1932  
1930 1933          ENTRY(xcopyout_nta)
1931 1934          pushq   %rbp
1932 1935          movq    %rsp, %rbp
1933 1936          subq    $24, %rsp
1934 1937  
1935 1938          /*
1936 1939           * save args in case we trap and need to rerun as a copyop
1937 1940           */
1938 1941          movq    %rdi, (%rsp)
1939 1942          movq    %rsi, 0x8(%rsp)
1940 1943          movq    %rdx, 0x10(%rsp)
1941 1944  
1942 1945          movq    kernelbase(%rip), %rax
1943 1946  #ifdef DEBUG
1944 1947          cmpq    %rax, %rdi              /* %rdi = kaddr */
1945 1948          jnb     1f
1946 1949          leaq    .xcopyout_panic_msg(%rip), %rdi
1947 1950          xorl    %eax, %eax
1948 1951          call    panic
1949 1952  1:
1950 1953  #endif
1951 1954          movq    %gs:CPU_THREAD, %r9
1952 1955          cmpq    %rax, %rsi              /* test uaddr < kernelbase */
1953 1956          jae     4f
1954 1957  
1955 1958          cmpq    $0, %rcx                /* No non-temporal access? */
1956 1959          /*
1957 1960           * pass lofault value as 4th argument to do_copy_fault
1958 1961           */

↓ open down ↓

52 lines elided

↑ open up ↑

1959 1962          leaq    _xcopyout_err(%rip), %rcx
1960 1963          jnz     6f
1961 1964          /*
1962 1965           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1963 1966           */
1964 1967          cmpq    $XCOPY_MIN_SIZE, %rdx
1965 1968          jae     5f
1966 1969  6:
1967 1970          SMAP_DISABLE_INSTR(4)
1968 1971          jmp     do_copy_fault
1969      -        
     1972 +
1970 1973          /*
1971 1974           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1972 1975           * count is COUNT_ALIGN_SIZE aligned.
1973 1976           */
1974 1977  5:
1975 1978          movq    %rdi, %r10
1976 1979          orq     %rsi, %r10
1977 1980          andq    $NTA_ALIGN_MASK, %r10
1978 1981          orq     %rdx, %r10
1979 1982          andq    $COUNT_ALIGN_MASK, %r10
1980      -        jnz     6b      
     1983 +        jnz     6b
1981 1984          leaq    _xcopyout_nta_err(%rip), %rcx
1982 1985          SMAP_DISABLE_INSTR(5)
1983 1986          call    do_copy_fault_nta
1984 1987          SMAP_ENABLE_INSTR(5)
1985 1988          ret
1986 1989  
1987 1990  4:
1988 1991          movl    $EFAULT, %eax
1989 1992          jmp     3f
1990 1993

1991 1994          /*
1992 1995           * A fault during do_copy_fault or do_copy_fault_nta is
1993 1996           * indicated through an errno value in %rax and we iret from the
1994 1997           * trap handler to here.
1995 1998           */
1996 1999  _xcopyout_err:
1997 2000          addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1998 2001  _xcopyout_nta_err:
1999 2002          SMAP_ENABLE_INSTR(6)
2000 2003          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2001 2004  3:
2002 2005          movq    T_COPYOPS(%r9), %r8

↓ open down ↓

12 lines elided

↑ open up ↑

2003 2006          cmpq    $0, %r8
2004 2007          jz      2f
2005 2008  
2006 2009          /*
2007 2010           * reload args for the copyop
2008 2011           */
2009 2012          movq    (%rsp), %rdi
2010 2013          movq    0x8(%rsp), %rsi
2011 2014          movq    0x10(%rsp), %rdx
2012 2015          leave
2013      -        jmp     *CP_XCOPYOUT(%r8)
     2016 +        movq    CP_XCOPYOUT(%r8), %r8
     2017 +        INDIRECT_JMP_REG(r8)
2014 2018  
2015 2019  2:      leave
2016 2020          ret
2017 2021          SET_SIZE(xcopyout_nta)
2018 2022  
2019 2023  #elif defined(__i386)
2020 2024  
2021 2025  #define ARG_KADDR       4
2022 2026  #define ARG_UADDR       8
2023 2027  #define ARG_COUNT       12

2024 2028  #define ARG_CACHED      16
2025 2029  
2026 2030          ENTRY(xcopyout_nta)
2027 2031          movl    kernelbase, %ecx
2028 2032          lea     _xcopyout_err, %eax
2029 2033          movl    %gs:CPU_THREAD, %edx
2030 2034          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2031 2035          jae     4f
2032 2036  
2033 2037          cmpl    $0, use_sse_copy        /* no sse support */

↓ open down ↓

10 lines elided

↑ open up ↑

2034 2038          jz      do_copy_fault
2035 2039  
2036 2040          cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
2037 2041          jnz     do_copy_fault
2038 2042  
2039 2043          /*
2040 2044           * Make sure cnt is >= XCOPY_MIN_SIZE bytes
2041 2045           */
2042 2046          cmpl    $XCOPY_MIN_SIZE, %edx
2043 2047          jb      do_copy_fault
2044      -        
     2048 +
2045 2049          /*
2046 2050           * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2047 2051           * count is COUNT_ALIGN_SIZE aligned.
2048 2052           */
2049 2053          movl    ARG_UADDR(%esp), %ecx
2050 2054          orl     ARG_KADDR(%esp), %ecx
2051 2055          andl    $NTA_ALIGN_MASK, %ecx
2052 2056          orl     ARG_COUNT(%esp), %ecx
2053 2057          andl    $COUNT_ALIGN_MASK, %ecx
2054 2058          jnz     do_copy_fault

2055 2059          jmp     do_copy_fault_nta
2056 2060  
2057 2061  4:
2058 2062          movl    $EFAULT, %eax
2059 2063          jmp     3f
2060 2064  
2061 2065          /*
2062 2066           * A fault during do_copy_fault or do_copy_fault_nta is
2063 2067           * indicated through an errno value in %eax and we iret from the
2064 2068           * trap handler to here.
2065 2069           */
2066 2070  _xcopyout_err:
2067 2071          / restore the original lofault
2068 2072          popl    %ecx
2069 2073          popl    %edi
2070 2074          movl    %ecx, T_LOFAULT(%edx)   / original lofault
2071 2075          popl    %esi
2072 2076          popl    %ebp
2073 2077  3:
2074 2078          cmpl    $0, T_COPYOPS(%edx)
2075 2079          jz      2f
2076 2080          movl    T_COPYOPS(%edx), %eax
2077 2081          jmp     *CP_XCOPYOUT(%eax)
2078 2082  
2079 2083  2:      rep;    ret     /* use 2 byte return instruction when branch target */
2080 2084                          /* AMD Software Optimization Guide - Section 6.2 */
2081 2085          SET_SIZE(xcopyout_nta)
2082 2086  
2083 2087  #undef  ARG_UADDR
2084 2088  #undef  ARG_KADDR
2085 2089  #undef  ARG_COUNT
2086 2090  #undef  ARG_CACHED
2087 2091  
2088 2092  #endif  /* __i386 */
2089 2093  #endif  /* __lint */
2090 2094  
2091 2095  /*
2092 2096   * Copy a null terminated string from one point to another in
2093 2097   * the kernel address space.
2094 2098   */
2095 2099  
2096 2100  #if defined(__lint)
2097 2101  
2098 2102  /* ARGSUSED */
2099 2103  int
2100 2104  copystr(const char *from, char *to, size_t maxlength, size_t *lencopied)
2101 2105  { return (0); }
2102 2106  
2103 2107  #else   /* __lint */
2104 2108  
2105 2109  #if defined(__amd64)
2106 2110  
2107 2111          ENTRY(copystr)
2108 2112          pushq   %rbp
2109 2113          movq    %rsp, %rbp
2110 2114  #ifdef DEBUG
2111 2115          movq    kernelbase(%rip), %rax
2112 2116          cmpq    %rax, %rdi              /* %rdi = from */
2113 2117          jb      0f
2114 2118          cmpq    %rax, %rsi              /* %rsi = to */
2115 2119          jnb     1f
2116 2120  0:      leaq    .copystr_panic_msg(%rip), %rdi
2117 2121          xorl    %eax, %eax
2118 2122          call    panic
2119 2123  1:
2120 2124  #endif
2121 2125          movq    %gs:CPU_THREAD, %r9
2122 2126          movq    T_LOFAULT(%r9), %r8     /* pass current lofault value as */
2123 2127                                          /* 5th argument to do_copystr */
2124 2128          xorl    %r10d,%r10d             /* pass smap restore need in %r10d */
2125 2129                                          /* as a non-ABI 6th arg */
2126 2130  do_copystr:
2127 2131          movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
2128 2132          movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
2129 2133          movq    %r8, T_LOFAULT(%r9)     /* new lofault */
2130 2134  
2131 2135          movq    %rdx, %r8               /* save maxlength */
2132 2136  
2133 2137          cmpq    $0, %rdx                /* %rdx = maxlength */
2134 2138          je      copystr_enametoolong    /* maxlength == 0 */
2135 2139  
2136 2140  copystr_loop:
2137 2141          decq    %r8
2138 2142          movb    (%rdi), %al
2139 2143          incq    %rdi
2140 2144          movb    %al, (%rsi)
2141 2145          incq    %rsi
2142 2146          cmpb    $0, %al
2143 2147          je      copystr_null            /* null char */
2144 2148          cmpq    $0, %r8
2145 2149          jne     copystr_loop
2146 2150  
2147 2151  copystr_enametoolong:
2148 2152          movl    $ENAMETOOLONG, %eax
2149 2153          jmp     copystr_out
2150 2154  
2151 2155  copystr_null:
2152 2156          xorl    %eax, %eax              /* no error */
2153 2157  
2154 2158  copystr_out:
2155 2159          cmpq    $0, %rcx                /* want length? */
2156 2160          je      copystr_smap            /* no */
2157 2161          subq    %r8, %rdx               /* compute length and store it */
2158 2162          movq    %rdx, (%rcx)
2159 2163  
2160 2164  copystr_smap:
2161 2165          cmpl    $0, %r10d
2162 2166          jz      copystr_done
2163 2167          SMAP_ENABLE_INSTR(7)
2164 2168  
2165 2169  copystr_done:
2166 2170          movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2167 2171          leave
2168 2172          ret
2169 2173          SET_SIZE(copystr)
2170 2174  
2171 2175  #elif defined(__i386)
2172 2176  
2173 2177  #define ARG_FROM        8
2174 2178  #define ARG_TO          12
2175 2179  #define ARG_MAXLEN      16
2176 2180  #define ARG_LENCOPIED   20
2177 2181  
2178 2182          ENTRY(copystr)
2179 2183  #ifdef DEBUG
2180 2184          pushl   %ebp
2181 2185          movl    %esp, %ebp
2182 2186          movl    kernelbase, %eax
2183 2187          cmpl    %eax, ARG_FROM(%esp)
2184 2188          jb      0f
2185 2189          cmpl    %eax, ARG_TO(%esp)
2186 2190          jnb     1f
2187 2191  0:      pushl   $.copystr_panic_msg
2188 2192          call    panic
2189 2193  1:      popl    %ebp

↓ open down ↓

135 lines elided

↑ open up ↑

2190 2194  #endif
2191 2195          /* get the current lofault address */
2192 2196          movl    %gs:CPU_THREAD, %eax
2193 2197          movl    T_LOFAULT(%eax), %eax
2194 2198  do_copystr:
2195 2199          pushl   %ebp                    /* setup stack frame */
2196 2200          movl    %esp, %ebp
2197 2201          pushl   %ebx                    /* save registers */
2198 2202          pushl   %edi
2199 2203  
2200      -        movl    %gs:CPU_THREAD, %ebx    
     2204 +        movl    %gs:CPU_THREAD, %ebx
2201 2205          movl    T_LOFAULT(%ebx), %edi
2202 2206          pushl   %edi                    /* save the current lofault */
2203 2207          movl    %eax, T_LOFAULT(%ebx)   /* new lofault */
2204 2208  
2205 2209          movl    ARG_MAXLEN(%ebp), %ecx
2206 2210          cmpl    $0, %ecx
2207 2211          je      copystr_enametoolong    /* maxlength == 0 */
2208 2212  
2209 2213          movl    ARG_FROM(%ebp), %ebx    /* source address */
2210 2214          movl    ARG_TO(%ebp), %edx      /* destination address */
2211 2215  
2212 2216  copystr_loop:
2213 2217          decl    %ecx
2214 2218          movb    (%ebx), %al
2215      -        incl    %ebx    
     2219 +        incl    %ebx
2216 2220          movb    %al, (%edx)
2217 2221          incl    %edx
2218 2222          cmpb    $0, %al
2219 2223          je      copystr_null            /* null char */
2220 2224          cmpl    $0, %ecx
2221 2225          jne     copystr_loop
2222 2226  
2223 2227  copystr_enametoolong:
2224 2228          movl    $ENAMETOOLONG, %eax
2225 2229          jmp     copystr_out

2226 2230  
2227 2231  copystr_null:
2228 2232          xorl    %eax, %eax              /* no error */
2229 2233

↓ open down ↓

4 lines elided

↑ open up ↑

2230 2234  copystr_out:
2231 2235          cmpl    $0, ARG_LENCOPIED(%ebp) /* want length? */
2232 2236          je      copystr_done            /* no */
2233 2237          movl    ARG_MAXLEN(%ebp), %edx
2234 2238          subl    %ecx, %edx              /* compute length and store it */
2235 2239          movl    ARG_LENCOPIED(%ebp), %ecx
2236 2240          movl    %edx, (%ecx)
2237 2241  
2238 2242  copystr_done:
2239 2243          popl    %edi
2240      -        movl    %gs:CPU_THREAD, %ebx    
     2244 +        movl    %gs:CPU_THREAD, %ebx
2241 2245          movl    %edi, T_LOFAULT(%ebx)   /* restore the original lofault */
2242 2246  
2243 2247          popl    %edi
2244 2248          popl    %ebx
2245 2249          popl    %ebp
2246      -        ret     
     2250 +        ret
2247 2251          SET_SIZE(copystr)
2248 2252  
2249 2253  #undef  ARG_FROM
2250 2254  #undef  ARG_TO
2251 2255  #undef  ARG_MAXLEN
2252 2256  #undef  ARG_LENCOPIED
2253 2257  
2254 2258  #endif  /* __i386 */
2255 2259  #endif  /* __lint */
2256 2260

2257 2261  /*
2258 2262   * Copy a null terminated string from the user address space into
2259 2263   * the kernel address space.
2260 2264   */
2261 2265  
2262 2266  #if defined(__lint)
2263 2267  
2264 2268  /* ARGSUSED */
2265 2269  int
2266 2270  copyinstr(const char *uaddr, char *kaddr, size_t maxlength,
2267 2271      size_t *lencopied)
2268 2272  { return (0); }
2269 2273  
2270 2274  #else   /* __lint */
2271 2275  
2272 2276  #if defined(__amd64)
2273 2277  
2274 2278          ENTRY(copyinstr)
2275 2279          pushq   %rbp
2276 2280          movq    %rsp, %rbp
2277 2281          subq    $32, %rsp
2278 2282  
2279 2283          /*
2280 2284           * save args in case we trap and need to rerun as a copyop
2281 2285           */
2282 2286          movq    %rdi, (%rsp)
2283 2287          movq    %rsi, 0x8(%rsp)
2284 2288          movq    %rdx, 0x10(%rsp)
2285 2289          movq    %rcx, 0x18(%rsp)
2286 2290  
2287 2291          movq    kernelbase(%rip), %rax
2288 2292  #ifdef DEBUG
2289 2293          cmpq    %rax, %rsi              /* %rsi = kaddr */
2290 2294          jnb     1f
2291 2295          leaq    .copyinstr_panic_msg(%rip), %rdi
2292 2296          xorl    %eax, %eax
2293 2297          call    panic
2294 2298  1:
2295 2299  #endif
2296 2300          /*
2297 2301           * pass lofault value as 5th argument to do_copystr
2298 2302           * do_copystr expects whether or not we need smap in %r10d
2299 2303           */
2300 2304          leaq    _copyinstr_error(%rip), %r8
2301 2305          movl    $1, %r10d
2302 2306  
2303 2307          cmpq    %rax, %rdi              /* test uaddr < kernelbase */
2304 2308          jae     4f
2305 2309          SMAP_DISABLE_INSTR(6)
2306 2310          jmp     do_copystr
2307 2311  4:
2308 2312          movq    %gs:CPU_THREAD, %r9
2309 2313          jmp     3f
2310 2314  
2311 2315  _copyinstr_error:
2312 2316          SMAP_ENABLE_INSTR(8)
2313 2317          movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2314 2318  3:
2315 2319          movq    T_COPYOPS(%r9), %rax
2316 2320          cmpq    $0, %rax

↓ open down ↓

60 lines elided

↑ open up ↑

2317 2321          jz      2f
2318 2322  
2319 2323          /*
2320 2324           * reload args for the copyop
2321 2325           */
2322 2326          movq    (%rsp), %rdi
2323 2327          movq    0x8(%rsp), %rsi
2324 2328          movq    0x10(%rsp), %rdx
2325 2329          movq    0x18(%rsp), %rcx
2326 2330          leave
2327      -        jmp     *CP_COPYINSTR(%rax)
2328      -        
     2331 +        movq    CP_COPYINSTR(%rax), %rax
     2332 +        INDIRECT_JMP_REG(rax)
     2333 +
2329 2334  2:      movl    $EFAULT, %eax           /* return EFAULT */
2330 2335          leave
2331 2336          ret
2332 2337          SET_SIZE(copyinstr)
2333 2338  
2334 2339  #elif defined(__i386)
2335 2340  
2336 2341  #define ARG_UADDR       4
2337 2342  #define ARG_KADDR       8
2338 2343

2339 2344          ENTRY(copyinstr)
2340 2345          movl    kernelbase, %ecx
2341 2346  #ifdef DEBUG
2342 2347          cmpl    %ecx, ARG_KADDR(%esp)
2343 2348          jnb     1f
2344 2349          pushl   %ebp
2345 2350          movl    %esp, %ebp
2346 2351          pushl   $.copyinstr_panic_msg
2347 2352          call    panic

↓ open down ↓

9 lines elided

↑ open up ↑

2348 2353  1:
2349 2354  #endif
2350 2355          lea     _copyinstr_error, %eax
2351 2356          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2352 2357          jb      do_copystr
2353 2358          movl    %gs:CPU_THREAD, %edx
2354 2359          jmp     3f
2355 2360  
2356 2361  _copyinstr_error:
2357 2362          popl    %edi
2358      -        movl    %gs:CPU_THREAD, %edx    
     2363 +        movl    %gs:CPU_THREAD, %edx
2359 2364          movl    %edi, T_LOFAULT(%edx)   /* original lofault */
2360 2365  
2361 2366          popl    %edi
2362 2367          popl    %ebx
2363 2368          popl    %ebp
2364 2369  3:
2365 2370          movl    T_COPYOPS(%edx), %eax
2366 2371          cmpl    $0, %eax
2367 2372          jz      2f
2368 2373          jmp     *CP_COPYINSTR(%eax)
2369      -        
     2374 +
2370 2375  2:      movl    $EFAULT, %eax           /* return EFAULT */
2371 2376          ret
2372 2377          SET_SIZE(copyinstr)
2373 2378  
2374 2379  #undef  ARG_UADDR
2375 2380  #undef  ARG_KADDR
2376 2381  
2377 2382  #endif  /* __i386 */
2378 2383  #endif  /* __lint */
2379 2384

2380 2385  /*
2381 2386   * Copy a null terminated string from the kernel
2382 2387   * address space to the user address space.
2383 2388   */
2384 2389  
2385 2390  #if defined(__lint)
2386 2391  
2387 2392  /* ARGSUSED */
2388 2393  int
2389 2394  copyoutstr(const char *kaddr, char *uaddr, size_t maxlength,
2390 2395      size_t *lencopied)
2391 2396  { return (0); }
2392 2397  
2393 2398  #else   /* __lint */
2394 2399  
2395 2400  #if defined(__amd64)
2396 2401  
2397 2402          ENTRY(copyoutstr)
2398 2403          pushq   %rbp
2399 2404          movq    %rsp, %rbp
2400 2405          subq    $32, %rsp
2401 2406  
2402 2407          /*
2403 2408           * save args in case we trap and need to rerun as a copyop
2404 2409           */
2405 2410          movq    %rdi, (%rsp)
2406 2411          movq    %rsi, 0x8(%rsp)
2407 2412          movq    %rdx, 0x10(%rsp)
2408 2413          movq    %rcx, 0x18(%rsp)
2409 2414  
2410 2415          movq    kernelbase(%rip), %rax
2411 2416  #ifdef DEBUG
2412 2417          cmpq    %rax, %rdi              /* %rdi = kaddr */
2413 2418          jnb     1f
2414 2419          leaq    .copyoutstr_panic_msg(%rip), %rdi
2415 2420          jmp     call_panic              /* setup stack and call panic */
2416 2421  1:
2417 2422  #endif
2418 2423          /*
2419 2424           * pass lofault value as 5th argument to do_copystr
2420 2425           * pass one as 6th argument to do_copystr in %r10d
2421 2426           */
2422 2427          leaq    _copyoutstr_error(%rip), %r8
2423 2428          movl    $1, %r10d
2424 2429  
2425 2430          cmpq    %rax, %rsi              /* test uaddr < kernelbase */
2426 2431          jae     4f
2427 2432          SMAP_DISABLE_INSTR(7)
2428 2433          jmp     do_copystr
2429 2434  4:
2430 2435          movq    %gs:CPU_THREAD, %r9
2431 2436          jmp     3f
2432 2437  
2433 2438  _copyoutstr_error:
2434 2439          SMAP_ENABLE_INSTR(9)
2435 2440          movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2436 2441  3:
2437 2442          movq    T_COPYOPS(%r9), %rax
2438 2443          cmpq    $0, %rax

↓ open down ↓

59 lines elided

↑ open up ↑

2439 2444          jz      2f
2440 2445  
2441 2446          /*
2442 2447           * reload args for the copyop
2443 2448           */
2444 2449          movq    (%rsp), %rdi
2445 2450          movq    0x8(%rsp), %rsi
2446 2451          movq    0x10(%rsp), %rdx
2447 2452          movq    0x18(%rsp), %rcx
2448 2453          leave
2449      -        jmp     *CP_COPYOUTSTR(%rax)
2450      -        
     2454 +        movq    CP_COPYOUTSTR(%rax), %rax
     2455 +        INDIRECT_JMP_REG(rax)
     2456 +
2451 2457  2:      movl    $EFAULT, %eax           /* return EFAULT */
2452 2458          leave
2453 2459          ret
2454      -        SET_SIZE(copyoutstr)    
2455      -        
     2460 +        SET_SIZE(copyoutstr)
     2461 +
2456 2462  #elif defined(__i386)
2457 2463  
2458 2464  #define ARG_KADDR       4
2459 2465  #define ARG_UADDR       8
2460 2466  
2461 2467          ENTRY(copyoutstr)
2462 2468          movl    kernelbase, %ecx
2463 2469  #ifdef DEBUG
2464 2470          cmpl    %ecx, ARG_KADDR(%esp)
2465 2471          jnb     1f

2466 2472          pushl   %ebp
2467 2473          movl    %esp, %ebp
2468 2474          pushl   $.copyoutstr_panic_msg
2469 2475          call    panic

↓ open down ↓

4 lines elided

↑ open up ↑

2470 2476  1:
2471 2477  #endif
2472 2478          lea     _copyoutstr_error, %eax
2473 2479          cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2474 2480          jb      do_copystr
2475 2481          movl    %gs:CPU_THREAD, %edx
2476 2482          jmp     3f
2477 2483  
2478 2484  _copyoutstr_error:
2479 2485          popl    %edi
2480      -        movl    %gs:CPU_THREAD, %edx    
     2486 +        movl    %gs:CPU_THREAD, %edx
2481 2487          movl    %edi, T_LOFAULT(%edx)   /* restore the original lofault */
2482 2488  
2483 2489          popl    %edi
2484 2490          popl    %ebx
2485 2491          popl    %ebp
2486 2492  3:
2487 2493          movl    T_COPYOPS(%edx), %eax
2488 2494          cmpl    $0, %eax
2489 2495          jz      2f
2490 2496          jmp     *CP_COPYOUTSTR(%eax)
2491 2497  
2492 2498  2:      movl    $EFAULT, %eax           /* return EFAULT */
2493 2499          ret
2494 2500          SET_SIZE(copyoutstr)
2495      -        
     2501 +
2496 2502  #undef  ARG_KADDR
2497 2503  #undef  ARG_UADDR
2498 2504  
2499 2505  #endif  /* __i386 */
2500 2506  #endif  /* __lint */
2501 2507  
2502 2508  /*
2503 2509   * Since all of the fuword() variants are so similar, we have a macro to spit
2504 2510   * them out.  This allows us to create DTrace-unobservable functions easily.
2505 2511   */
2506      -        
     2512 +
2507 2513  #if defined(__lint)
2508 2514  
2509 2515  #if defined(__amd64)
2510 2516  
2511 2517  /* ARGSUSED */
2512 2518  int
2513 2519  fuword64(const void *addr, uint64_t *dst)
2514 2520  { return (0); }
2515 2521  
2516 2522  #endif

2517 2523  
2518 2524  /* ARGSUSED */
2519 2525  int
2520 2526  fuword32(const void *addr, uint32_t *dst)
2521 2527  { return (0); }
2522 2528  
2523 2529  /* ARGSUSED */
2524 2530  int
2525 2531  fuword16(const void *addr, uint16_t *dst)
2526 2532  { return (0); }
2527 2533  
2528 2534  /* ARGSUSED */
2529 2535  int
2530 2536  fuword8(const void *addr, uint8_t *dst)
2531 2537  { return (0); }
2532 2538  
2533 2539  #else   /* __lint */
2534 2540  
2535 2541  #if defined(__amd64)
2536 2542  
2537 2543  /*
2538 2544   * Note that we don't save and reload the arguments here
2539 2545   * because their values are not altered in the copy path.
2540 2546   * Additionally, when successful, the smap_enable jmp will
2541 2547   * actually return us to our original caller.
2542 2548   */
2543 2549  
2544 2550  #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2545 2551          ENTRY(NAME)                             \
2546 2552          movq    %gs:CPU_THREAD, %r9;            \
2547 2553          cmpq    kernelbase(%rip), %rdi;         \
2548 2554          jae     1f;                             \
2549 2555          leaq    _flt_/**/NAME, %rdx;            \
2550 2556          movq    %rdx, T_LOFAULT(%r9);           \
2551 2557          SMAP_DISABLE_INSTR(DISNUM)              \
2552 2558          INSTR   (%rdi), REG;                    \
2553 2559          movq    $0, T_LOFAULT(%r9);             \
2554 2560          INSTR   REG, (%rsi);                    \

↓ open down ↓

38 lines elided

↑ open up ↑

2555 2561          xorl    %eax, %eax;                     \
2556 2562          SMAP_ENABLE_INSTR(EN1)                  \
2557 2563          ret;                                    \
2558 2564  _flt_/**/NAME:                                  \
2559 2565          SMAP_ENABLE_INSTR(EN2)                  \
2560 2566          movq    $0, T_LOFAULT(%r9);             \
2561 2567  1:                                              \
2562 2568          movq    T_COPYOPS(%r9), %rax;           \
2563 2569          cmpq    $0, %rax;                       \
2564 2570          jz      2f;                             \
2565      -        jmp     *COPYOP(%rax);                  \
     2571 +        movq    COPYOP(%rax), %rax;             \
     2572 +        INDIRECT_JMP_REG(rax);                  \
2566 2573  2:                                              \
2567 2574          movl    $-1, %eax;                      \
2568 2575          ret;                                    \
2569 2576          SET_SIZE(NAME)
2570      -        
     2577 +
2571 2578          FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2572 2579          FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2573 2580          FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2574 2581          FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2575 2582  
2576 2583  #elif defined(__i386)
2577 2584  
2578 2585  #define FUWORD(NAME, INSTR, REG, COPYOP)        \
2579 2586          ENTRY(NAME)                             \
2580 2587          movl    %gs:CPU_THREAD, %ecx;           \

2581 2588          movl    kernelbase, %eax;               \
2582 2589          cmpl    %eax, 4(%esp);                  \
2583 2590          jae     1f;                             \
2584 2591          lea     _flt_/**/NAME, %edx;            \
2585 2592          movl    %edx, T_LOFAULT(%ecx);          \
2586 2593          movl    4(%esp), %eax;                  \
2587 2594          movl    8(%esp), %edx;                  \
2588 2595          INSTR   (%eax), REG;                    \
2589 2596          movl    $0, T_LOFAULT(%ecx);            \
2590 2597          INSTR   REG, (%edx);                    \
2591 2598          xorl    %eax, %eax;                     \
2592 2599          ret;                                    \
2593 2600  _flt_/**/NAME:                                  \
2594 2601          movl    $0, T_LOFAULT(%ecx);            \
2595 2602  1:                                              \
2596 2603          movl    T_COPYOPS(%ecx), %eax;          \
2597 2604          cmpl    $0, %eax;                       \
2598 2605          jz      2f;                             \
2599 2606          jmp     *COPYOP(%eax);                  \
2600 2607  2:                                              \
2601 2608          movl    $-1, %eax;                      \
2602 2609          ret;                                    \
2603 2610          SET_SIZE(NAME)
2604 2611  
2605 2612          FUWORD(fuword32, movl, %eax, CP_FUWORD32)
2606 2613          FUWORD(fuword16, movw, %ax, CP_FUWORD16)
2607 2614          FUWORD(fuword8, movb, %al, CP_FUWORD8)
2608 2615  
2609 2616  #endif  /* __i386 */
2610 2617  
2611 2618  #undef  FUWORD
2612 2619  
2613 2620  #endif  /* __lint */
2614 2621  
2615 2622  /*
2616 2623   * Set user word.
2617 2624   */
2618 2625  
2619 2626  #if defined(__lint)
2620 2627  
2621 2628  #if defined(__amd64)
2622 2629  
2623 2630  /* ARGSUSED */
2624 2631  int
2625 2632  suword64(void *addr, uint64_t value)
2626 2633  { return (0); }
2627 2634  
2628 2635  #endif
2629 2636  
2630 2637  /* ARGSUSED */
2631 2638  int
2632 2639  suword32(void *addr, uint32_t value)
2633 2640  { return (0); }
2634 2641  
2635 2642  /* ARGSUSED */
2636 2643  int
2637 2644  suword16(void *addr, uint16_t value)
2638 2645  { return (0); }
2639 2646  
2640 2647  /* ARGSUSED */
2641 2648  int
2642 2649  suword8(void *addr, uint8_t value)
2643 2650  { return (0); }
2644 2651  
2645 2652  #else   /* lint */
2646 2653  
2647 2654  #if defined(__amd64)
2648 2655  
2649 2656  /*
2650 2657   * Note that we don't save and reload the arguments here
2651 2658   * because their values are not altered in the copy path.
2652 2659   */
2653 2660  
2654 2661  #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2655 2662          ENTRY(NAME)                             \
2656 2663          movq    %gs:CPU_THREAD, %r9;            \
2657 2664          cmpq    kernelbase(%rip), %rdi;         \
2658 2665          jae     1f;                             \
2659 2666          leaq    _flt_/**/NAME, %rdx;            \
2660 2667          SMAP_DISABLE_INSTR(DISNUM)              \
2661 2668          movq    %rdx, T_LOFAULT(%r9);           \
2662 2669          INSTR   REG, (%rdi);                    \
2663 2670          movq    $0, T_LOFAULT(%r9);             \

↓ open down ↓

83 lines elided

↑ open up ↑

2664 2671          xorl    %eax, %eax;                     \
2665 2672          SMAP_ENABLE_INSTR(EN1)                  \
2666 2673          ret;                                    \
2667 2674  _flt_/**/NAME:                                  \
2668 2675          SMAP_ENABLE_INSTR(EN2)                  \
2669 2676          movq    $0, T_LOFAULT(%r9);             \
2670 2677  1:                                              \
2671 2678          movq    T_COPYOPS(%r9), %rax;           \
2672 2679          cmpq    $0, %rax;                       \
2673 2680          jz      3f;                             \
2674      -        jmp     *COPYOP(%rax);                  \
     2681 +        movq    COPYOP(%rax), %rax;             \
     2682 +        INDIRECT_JMP_REG(rax);                  \
2675 2683  3:                                              \
2676 2684          movl    $-1, %eax;                      \
2677 2685          ret;                                    \
2678 2686          SET_SIZE(NAME)
2679 2687  
2680 2688          SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2681 2689          SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2682 2690          SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2683 2691          SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2684 2692

2685 2693  #elif defined(__i386)
2686 2694  
2687 2695  #define SUWORD(NAME, INSTR, REG, COPYOP)        \
2688 2696          ENTRY(NAME)                             \
2689 2697          movl    %gs:CPU_THREAD, %ecx;           \
2690 2698          movl    kernelbase, %eax;               \
2691 2699          cmpl    %eax, 4(%esp);                  \
2692 2700          jae     1f;                             \
2693 2701          lea     _flt_/**/NAME, %edx;            \
2694 2702          movl    %edx, T_LOFAULT(%ecx);          \
2695 2703          movl    4(%esp), %eax;                  \
2696 2704          movl    8(%esp), %edx;                  \
2697 2705          INSTR   REG, (%eax);                    \
2698 2706          movl    $0, T_LOFAULT(%ecx);            \
2699 2707          xorl    %eax, %eax;                     \
2700 2708          ret;                                    \
2701 2709  _flt_/**/NAME:                                  \
2702 2710          movl    $0, T_LOFAULT(%ecx);            \
2703 2711  1:                                              \
2704 2712          movl    T_COPYOPS(%ecx), %eax;          \
2705 2713          cmpl    $0, %eax;                       \
2706 2714          jz      3f;                             \
2707 2715          movl    COPYOP(%eax), %ecx;             \
2708 2716          jmp     *%ecx;                          \
2709 2717  3:                                              \
2710 2718          movl    $-1, %eax;                      \
2711 2719          ret;                                    \
2712 2720          SET_SIZE(NAME)
2713 2721  
2714 2722          SUWORD(suword32, movl, %edx, CP_SUWORD32)
2715 2723          SUWORD(suword16, movw, %dx, CP_SUWORD16)
2716 2724          SUWORD(suword8, movb, %dl, CP_SUWORD8)
2717 2725  
2718 2726  #endif  /* __i386 */
2719 2727  
2720 2728  #undef  SUWORD
2721 2729  
2722 2730  #endif  /* __lint */
2723 2731  
2724 2732  #if defined(__lint)
2725 2733  
2726 2734  #if defined(__amd64)
2727 2735  
2728 2736  /*ARGSUSED*/
2729 2737  void
2730 2738  fuword64_noerr(const void *addr, uint64_t *dst)
2731 2739  {}
2732 2740  
2733 2741  #endif
2734 2742  
2735 2743  /*ARGSUSED*/
2736 2744  void
2737 2745  fuword32_noerr(const void *addr, uint32_t *dst)
2738 2746  {}
2739 2747  
2740 2748  /*ARGSUSED*/
2741 2749  void
2742 2750  fuword8_noerr(const void *addr, uint8_t *dst)
2743 2751  {}
2744 2752  
2745 2753  /*ARGSUSED*/
2746 2754  void
2747 2755  fuword16_noerr(const void *addr, uint16_t *dst)
2748 2756  {}
2749 2757  
2750 2758  #else   /* __lint */
2751 2759  
2752 2760  #if defined(__amd64)
2753 2761  
2754 2762  #define FUWORD_NOERR(NAME, INSTR, REG)          \
2755 2763          ENTRY(NAME)                             \
2756 2764          cmpq    kernelbase(%rip), %rdi;         \
2757 2765          cmovnbq kernelbase(%rip), %rdi;         \
2758 2766          INSTR   (%rdi), REG;                    \
2759 2767          INSTR   REG, (%rsi);                    \
2760 2768          ret;                                    \
2761 2769          SET_SIZE(NAME)
2762 2770  
2763 2771          FUWORD_NOERR(fuword64_noerr, movq, %rax)
2764 2772          FUWORD_NOERR(fuword32_noerr, movl, %eax)
2765 2773          FUWORD_NOERR(fuword16_noerr, movw, %ax)
2766 2774          FUWORD_NOERR(fuword8_noerr, movb, %al)
2767 2775  
2768 2776  #elif defined(__i386)
2769 2777  
2770 2778  #define FUWORD_NOERR(NAME, INSTR, REG)          \
2771 2779          ENTRY(NAME)                             \
2772 2780          movl    4(%esp), %eax;                  \
2773 2781          cmpl    kernelbase, %eax;               \
2774 2782          jb      1f;                             \
2775 2783          movl    kernelbase, %eax;               \
2776 2784  1:      movl    8(%esp), %edx;                  \
2777 2785          INSTR   (%eax), REG;                    \
2778 2786          INSTR   REG, (%edx);                    \
2779 2787          ret;                                    \
2780 2788          SET_SIZE(NAME)
2781 2789  
2782 2790          FUWORD_NOERR(fuword32_noerr, movl, %ecx)
2783 2791          FUWORD_NOERR(fuword16_noerr, movw, %cx)
2784 2792          FUWORD_NOERR(fuword8_noerr, movb, %cl)
2785 2793  
2786 2794  #endif  /* __i386 */
2787 2795  
2788 2796  #undef  FUWORD_NOERR
2789 2797  
2790 2798  #endif  /* __lint */
2791 2799  
2792 2800  #if defined(__lint)
2793 2801  
2794 2802  #if defined(__amd64)
2795 2803  
2796 2804  /*ARGSUSED*/
2797 2805  void
2798 2806  suword64_noerr(void *addr, uint64_t value)
2799 2807  {}
2800 2808  
2801 2809  #endif
2802 2810  
2803 2811  /*ARGSUSED*/
2804 2812  void
2805 2813  suword32_noerr(void *addr, uint32_t value)
2806 2814  {}
2807 2815  
2808 2816  /*ARGSUSED*/
2809 2817  void
2810 2818  suword16_noerr(void *addr, uint16_t value)
2811 2819  {}
2812 2820  
2813 2821  /*ARGSUSED*/
2814 2822  void
2815 2823  suword8_noerr(void *addr, uint8_t value)
2816 2824  {}
2817 2825  
2818 2826  #else   /* lint */
2819 2827  
2820 2828  #if defined(__amd64)
2821 2829  
2822 2830  #define SUWORD_NOERR(NAME, INSTR, REG)          \
2823 2831          ENTRY(NAME)                             \
2824 2832          cmpq    kernelbase(%rip), %rdi;         \
2825 2833          cmovnbq kernelbase(%rip), %rdi;         \
2826 2834          INSTR   REG, (%rdi);                    \
2827 2835          ret;                                    \
2828 2836          SET_SIZE(NAME)
2829 2837  
2830 2838          SUWORD_NOERR(suword64_noerr, movq, %rsi)
2831 2839          SUWORD_NOERR(suword32_noerr, movl, %esi)
2832 2840          SUWORD_NOERR(suword16_noerr, movw, %si)
2833 2841          SUWORD_NOERR(suword8_noerr, movb, %sil)
2834 2842  
2835 2843  #elif defined(__i386)
2836 2844  
2837 2845  #define SUWORD_NOERR(NAME, INSTR, REG)          \
2838 2846          ENTRY(NAME)                             \
2839 2847          movl    4(%esp), %eax;                  \
2840 2848          cmpl    kernelbase, %eax;               \
2841 2849          jb      1f;                             \
2842 2850          movl    kernelbase, %eax;               \
2843 2851  1:                                              \
2844 2852          movl    8(%esp), %edx;                  \
2845 2853          INSTR   REG, (%eax);                    \
2846 2854          ret;                                    \
2847 2855          SET_SIZE(NAME)
2848 2856  
2849 2857          SUWORD_NOERR(suword32_noerr, movl, %edx)
2850 2858          SUWORD_NOERR(suword16_noerr, movw, %dx)
2851 2859          SUWORD_NOERR(suword8_noerr, movb, %dl)
2852 2860  
2853 2861  #endif  /* __i386 */
2854 2862  
2855 2863  #undef  SUWORD_NOERR
2856 2864  
2857 2865  #endif  /* lint */
2858 2866  
2859 2867  
2860 2868  #if defined(__lint)
2861 2869  
2862 2870  /*ARGSUSED*/
2863 2871  int
2864 2872  subyte(void *addr, uchar_t value)
2865 2873  { return (0); }
2866 2874  
2867 2875  /*ARGSUSED*/
2868 2876  void
2869 2877  subyte_noerr(void *addr, uchar_t value)
2870 2878  {}
2871 2879  
2872 2880  /*ARGSUSED*/
2873 2881  int
2874 2882  fulword(const void *addr, ulong_t *valuep)
2875 2883  { return (0); }
2876 2884  
2877 2885  /*ARGSUSED*/
2878 2886  void
2879 2887  fulword_noerr(const void *addr, ulong_t *valuep)
2880 2888  {}
2881 2889  
2882 2890  /*ARGSUSED*/
2883 2891  int
2884 2892  sulword(void *addr, ulong_t valuep)
2885 2893  { return (0); }
2886 2894  
2887 2895  /*ARGSUSED*/
2888 2896  void
2889 2897  sulword_noerr(void *addr, ulong_t valuep)
2890 2898  {}
2891 2899  
2892 2900  #else
2893 2901  
2894 2902          .weak   subyte
2895 2903          subyte=suword8
2896 2904          .weak   subyte_noerr
2897 2905          subyte_noerr=suword8_noerr
2898 2906  
2899 2907  #if defined(__amd64)
2900 2908  
2901 2909          .weak   fulword
2902 2910          fulword=fuword64
2903 2911          .weak   fulword_noerr
2904 2912          fulword_noerr=fuword64_noerr
2905 2913          .weak   sulword
2906 2914          sulword=suword64
2907 2915          .weak   sulword_noerr
2908 2916          sulword_noerr=suword64_noerr
2909 2917  
2910 2918  #elif defined(__i386)
2911 2919  
2912 2920          .weak   fulword
2913 2921          fulword=fuword32
2914 2922          .weak   fulword_noerr
2915 2923          fulword_noerr=fuword32_noerr
2916 2924          .weak   sulword
2917 2925          sulword=suword32
2918 2926          .weak   sulword_noerr
2919 2927          sulword_noerr=suword32_noerr
2920 2928  
2921 2929  #endif /* __i386 */
2922 2930  
2923 2931  #endif /* __lint */
2924 2932  
2925 2933  #if defined(__lint)
2926 2934  
2927 2935  /*
2928 2936   * Copy a block of storage - must not overlap (from + len <= to).
2929 2937   * No fault handler installed (to be called under on_fault())
2930 2938   */
2931 2939  
2932 2940  /* ARGSUSED */
2933 2941  void
2934 2942  copyout_noerr(const void *kfrom, void *uto, size_t count)
2935 2943  {}
2936 2944  
2937 2945  /* ARGSUSED */
2938 2946  void
2939 2947  copyin_noerr(const void *ufrom, void *kto, size_t count)
2940 2948  {}
2941 2949  
2942 2950  /*
2943 2951   * Zero a block of storage in user space
2944 2952   */
2945 2953  
2946 2954  /* ARGSUSED */
2947 2955  void
2948 2956  uzero(void *addr, size_t count)
2949 2957  {}
2950 2958  
2951 2959  /*
2952 2960   * copy a block of storage in user space
2953 2961   */
2954 2962  
2955 2963  /* ARGSUSED */
2956 2964  void
2957 2965  ucopy(const void *ufrom, void *uto, size_t ulength)
2958 2966  {}
2959 2967  
2960 2968  /*
2961 2969   * copy a string in user space
2962 2970   */
2963 2971  
2964 2972  /* ARGSUSED */
2965 2973  void
2966 2974  ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied)
2967 2975  {}
2968 2976  
2969 2977  #else /* __lint */
2970 2978  
2971 2979  #if defined(__amd64)
2972 2980  
2973 2981          ENTRY(copyin_noerr)
2974 2982          movq    kernelbase(%rip), %rax
2975 2983  #ifdef DEBUG
2976 2984          cmpq    %rax, %rsi              /* %rsi = kto */
2977 2985          jae     1f
2978 2986          leaq    .cpyin_ne_pmsg(%rip), %rdi
2979 2987          jmp     call_panic              /* setup stack and call panic */
2980 2988  1:
2981 2989  #endif
2982 2990          cmpq    %rax, %rdi              /* ufrom < kernelbase */
2983 2991          jb      do_copy
2984 2992          movq    %rax, %rdi              /* force fault at kernelbase */
2985 2993          jmp     do_copy
2986 2994          SET_SIZE(copyin_noerr)
2987 2995  
2988 2996          ENTRY(copyout_noerr)
2989 2997          movq    kernelbase(%rip), %rax
2990 2998  #ifdef DEBUG
2991 2999          cmpq    %rax, %rdi              /* %rdi = kfrom */
2992 3000          jae     1f
2993 3001          leaq    .cpyout_ne_pmsg(%rip), %rdi
2994 3002          jmp     call_panic              /* setup stack and call panic */
2995 3003  1:
2996 3004  #endif
2997 3005          cmpq    %rax, %rsi              /* uto < kernelbase */
2998 3006          jb      do_copy
2999 3007          movq    %rax, %rsi              /* force fault at kernelbase */
3000 3008          jmp     do_copy
3001 3009          SET_SIZE(copyout_noerr)
3002 3010  
3003 3011          ENTRY(uzero)
3004 3012          movq    kernelbase(%rip), %rax
3005 3013          cmpq    %rax, %rdi
3006 3014          jb      do_zero
3007 3015          movq    %rax, %rdi      /* force fault at kernelbase */
3008 3016          jmp     do_zero
3009 3017          SET_SIZE(uzero)
3010 3018  
3011 3019          ENTRY(ucopy)
3012 3020          movq    kernelbase(%rip), %rax
3013 3021          cmpq    %rax, %rdi
3014 3022          cmovaeq %rax, %rdi      /* force fault at kernelbase */
3015 3023          cmpq    %rax, %rsi
3016 3024          cmovaeq %rax, %rsi      /* force fault at kernelbase */
3017 3025          jmp     do_copy
3018 3026          SET_SIZE(ucopy)
3019 3027  
3020 3028          /*
3021 3029           * Note, the frame pointer is required here becuase do_copystr expects
3022 3030           * to be able to pop it off!
3023 3031           */
3024 3032          ENTRY(ucopystr)
3025 3033          pushq   %rbp
3026 3034          movq    %rsp, %rbp
3027 3035          movq    kernelbase(%rip), %rax
3028 3036          cmpq    %rax, %rdi
3029 3037          cmovaeq %rax, %rdi      /* force fault at kernelbase */
3030 3038          cmpq    %rax, %rsi
3031 3039          cmovaeq %rax, %rsi      /* force fault at kernelbase */
3032 3040          /* do_copystr expects lofault address in %r8 */
3033 3041          /* do_copystr expects whether or not we need smap in %r10 */
3034 3042          xorl    %r10d, %r10d
3035 3043          movq    %gs:CPU_THREAD, %r8
3036 3044          movq    T_LOFAULT(%r8), %r8
3037 3045          jmp     do_copystr
3038 3046          SET_SIZE(ucopystr)
3039 3047  
3040 3048  #elif defined(__i386)
3041 3049  
3042 3050          ENTRY(copyin_noerr)
3043 3051          movl    kernelbase, %eax
3044 3052  #ifdef DEBUG
3045 3053          cmpl    %eax, 8(%esp)
3046 3054          jae     1f
3047 3055          pushl   $.cpyin_ne_pmsg
3048 3056          call    panic
3049 3057  1:
3050 3058  #endif
3051 3059          cmpl    %eax, 4(%esp)
3052 3060          jb      do_copy
3053 3061          movl    %eax, 4(%esp)   /* force fault at kernelbase */
3054 3062          jmp     do_copy
3055 3063          SET_SIZE(copyin_noerr)
3056 3064  
3057 3065          ENTRY(copyout_noerr)
3058 3066          movl    kernelbase, %eax
3059 3067  #ifdef DEBUG
3060 3068          cmpl    %eax, 4(%esp)
3061 3069          jae     1f
3062 3070          pushl   $.cpyout_ne_pmsg
3063 3071          call    panic
3064 3072  1:
3065 3073  #endif
3066 3074          cmpl    %eax, 8(%esp)
3067 3075          jb      do_copy
3068 3076          movl    %eax, 8(%esp)   /* force fault at kernelbase */
3069 3077          jmp     do_copy
3070 3078          SET_SIZE(copyout_noerr)
3071 3079  
3072 3080          ENTRY(uzero)
3073 3081          movl    kernelbase, %eax
3074 3082          cmpl    %eax, 4(%esp)
3075 3083          jb      do_zero
3076 3084          movl    %eax, 4(%esp)   /* force fault at kernelbase */
3077 3085          jmp     do_zero
3078 3086          SET_SIZE(uzero)
3079 3087  
3080 3088          ENTRY(ucopy)
3081 3089          movl    kernelbase, %eax
3082 3090          cmpl    %eax, 4(%esp)
3083 3091          jb      1f
3084 3092          movl    %eax, 4(%esp)   /* force fault at kernelbase */
3085 3093  1:
3086 3094          cmpl    %eax, 8(%esp)
3087 3095          jb      do_copy
3088 3096          movl    %eax, 8(%esp)   /* force fault at kernelbase */
3089 3097          jmp     do_copy
3090 3098          SET_SIZE(ucopy)
3091 3099  
3092 3100          ENTRY(ucopystr)
3093 3101          movl    kernelbase, %eax
3094 3102          cmpl    %eax, 4(%esp)
3095 3103          jb      1f
3096 3104          movl    %eax, 4(%esp)   /* force fault at kernelbase */
3097 3105  1:
3098 3106          cmpl    %eax, 8(%esp)
3099 3107          jb      2f
3100 3108          movl    %eax, 8(%esp)   /* force fault at kernelbase */
3101 3109  2:
3102 3110          /* do_copystr expects the lofault address in %eax */
3103 3111          movl    %gs:CPU_THREAD, %eax
3104 3112          movl    T_LOFAULT(%eax), %eax
3105 3113          jmp     do_copystr
3106 3114          SET_SIZE(ucopystr)
3107 3115  
3108 3116  #endif  /* __i386 */
3109 3117  
3110 3118  #ifdef DEBUG
3111 3119          .data
3112 3120  .kcopy_panic_msg:
3113 3121          .string "kcopy: arguments below kernelbase"
3114 3122  .bcopy_panic_msg:
3115 3123          .string "bcopy: arguments below kernelbase"
3116 3124  .kzero_panic_msg:
3117 3125          .string "kzero: arguments below kernelbase"
3118 3126  .bzero_panic_msg:
3119 3127          .string "bzero: arguments below kernelbase"
3120 3128  .copyin_panic_msg:
3121 3129          .string "copyin: kaddr argument below kernelbase"
3122 3130  .xcopyin_panic_msg:
3123 3131          .string "xcopyin: kaddr argument below kernelbase"
3124 3132  .copyout_panic_msg:
3125 3133          .string "copyout: kaddr argument below kernelbase"
3126 3134  .xcopyout_panic_msg:
3127 3135          .string "xcopyout: kaddr argument below kernelbase"
3128 3136  .copystr_panic_msg:
3129 3137          .string "copystr: arguments in user space"
3130 3138  .copyinstr_panic_msg:
3131 3139          .string "copyinstr: kaddr argument not in kernel address space"
3132 3140  .copyoutstr_panic_msg:
3133 3141          .string "copyoutstr: kaddr argument not in kernel address space"
3134 3142  .cpyin_ne_pmsg:
3135 3143          .string "copyin_noerr: argument not in kernel address space"
3136 3144  .cpyout_ne_pmsg:
3137 3145          .string "copyout_noerr: argument not in kernel address space"
3138 3146  #endif
3139 3147  
3140 3148  #endif  /* __lint */
3141 3149  
3142 3150  /*
3143 3151   * These functions are used for SMAP, supervisor mode access protection. They
3144 3152   * are hotpatched to become real instructions when the system starts up which is
3145 3153   * done in mlsetup() as a part of enabling the other CR4 related features.
3146 3154   *
3147 3155   * Generally speaking, smap_disable() is a stac instruction and smap_enable is a
3148 3156   * clac instruction. It's safe to call these any number of times, and in fact,
3149 3157   * out of paranoia, the kernel will likely call it at several points.
3150 3158   */
3151 3159  
3152 3160  #if defined(__lint)
3153 3161  
3154 3162  void
3155 3163  smap_enable(void)
3156 3164  {}
3157 3165  
3158 3166  void
3159 3167  smap_disable(void)
3160 3168  {}
3161 3169  
3162 3170  #else
3163 3171  
3164 3172  #if defined (__amd64) || defined(__i386)
3165 3173          ENTRY(smap_disable)
3166 3174          nop
3167 3175          nop
3168 3176          nop
3169 3177          ret
3170 3178          SET_SIZE(smap_disable)
3171 3179  
3172 3180          ENTRY(smap_enable)
3173 3181          nop
3174 3182          nop
3175 3183          nop

↓ open down ↓

491 lines elided

↑ open up ↑

3176 3184          ret
3177 3185          SET_SIZE(smap_enable)
3178 3186  
3179 3187  #endif /* __amd64 || __i386 */
3180 3188  
3181 3189  #endif /* __lint */
3182 3190  
3183 3191  #ifndef __lint
3184 3192  
3185 3193  .data
3186      -.align  4
     3194 +.align  4
3187 3195  .globl  _smap_enable_patch_count
3188 3196  .type   _smap_enable_patch_count,@object
3189 3197  .size   _smap_enable_patch_count, 4
3190 3198  _smap_enable_patch_count:
3191 3199          .long   SMAP_ENABLE_COUNT
3192 3200  
3193 3201  .globl  _smap_disable_patch_count
3194 3202  .type   _smap_disable_patch_count,@object
3195 3203  .size   _smap_disable_patch_count, 4
3196 3204  _smap_disable_patch_count:
3197 3205          .long SMAP_DISABLE_COUNT
3198 3206  
3199 3207  #endif /* __lint */

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX