1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2009, Intel Corporation
  28  * All rights reserved.
  29  */
  30 
  31 /*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.        */
  32 /*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T              */
  33 /*         All Rights Reserved                                          */
  34 
  35 /*       Copyright (c) 1987, 1988 Microsoft Corporation                 */
  36 /*         All Rights Reserved                                          */
  37 
  38 /*
  39  * Copyright (c) 2017 Joyent, Inc.
  40  */
  41 
  42 #include <sys/errno.h>
  43 #include <sys/asm_linkage.h>
  44 
  45 #if defined(__lint)
  46 #include <sys/types.h>
  47 #include <sys/systm.h>
  48 #else   /* __lint */
  49 #include "assym.h"
  50 #endif  /* __lint */
  51 
  52 #define KCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  53 #define XCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  54 /*
  55  * Non-temopral access (NTA) alignment requirement
  56  */
  57 #define NTA_ALIGN_SIZE  4       /* Must be at least 4-byte aligned */
  58 #define NTA_ALIGN_MASK  _CONST(NTA_ALIGN_SIZE-1)
  59 #define COUNT_ALIGN_SIZE        16      /* Must be at least 16-byte aligned */
  60 #define COUNT_ALIGN_MASK        _CONST(COUNT_ALIGN_SIZE-1)
  61 
  62 /*
  63  * With the introduction of Broadwell, Intel has introduced supervisor mode
  64  * access protection -- SMAP. SMAP forces the kernel to set certain bits to
  65  * enable access of user pages (AC in rflags, defines as PS_ACHK in
  66  * <sys/psw.h>). One of the challenges is that the implementation of many of the
  67  * userland copy routines directly use the kernel ones. For example, copyin and
  68  * copyout simply go and jump to the do_copy_fault label and traditionally let
  69  * those deal with the return for them. In fact, changing that is a can of frame
  70  * pointers.
  71  *
  72  * Rules and Constraints:
  73  *
  74  * 1. For anything that's not in copy.s, we have it do explicit calls to the
  75  * smap related code. It usually is in a position where it is able to. This is
  76  * restricted to the following three places: DTrace, resume() in swtch.s and
  77  * on_fault/no_fault. If you want to add it somewhere else, we should be
  78  * thinking twice.
  79  *
  80  * 2. We try to toggle this at the smallest window possible. This means that if
  81  * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
  82  * other function, we will always leave with SMAP enabled (the kernel cannot
  83  * access user pages).
  84  *
  85  * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
  86  * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
  87  * which already takes care of ensuring that SMAP is enabled and disabled. Note
  88  * this means that when under an on_fault()/no_fault() handler, one must not
  89  * call the non-*_noeer() routines.
  90  *
  91  * 4. The first thing we should do after coming out of an lofault handler is to
  92  * make sure that we call smap_enable again to ensure that we are safely
  93  * protected, as more often than not, we will have disabled smap to get there.
  94  *
  95  * 5. The SMAP functions, smap_enable and smap_disable may not touch any
  96  * registers beyond those done by the call and ret. These routines may be called
  97  * from arbitrary contexts in copy.s where we have slightly more special ABIs in
  98  * place.
  99  *
 100  * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
 101  * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and
 102  * smap_disable()). If the number of these is changed, you must update the
 103  * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
 104  *
 105  * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is
 106  * no known technical reason preventing it from being enabled.
 107  *
 108  * 8. Generally this .s file is processed by a K&R style cpp. This means that it
 109  * really has a lot of feelings about whitespace. In particular, if you have a
 110  * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
 111  *
 112  * 9. The smap_enable and smap_disable functions should not generally be called.
 113  * They exist such that DTrace and on_trap() may use them, that's it.
 114  *
 115  * 10. In general, the kernel has its own value for rflags that gets used. This
 116  * is maintained in a few different places which vary based on how the thread
 117  * comes into existence and whether it's a user thread. In general, when the
 118  * kernel takes a trap, it always will set ourselves to a known set of flags,
 119  * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
 120  * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
 121  * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
 122  * where that gets masked off.
 123  */
 124 
 125 /*
 126  * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
 127  * "rep smovq" for large sizes. Performance data shows that many calls to
 128  * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
 129  * these small sizes unrolled code is used. For medium sizes loops writing
 130  * 64-bytes per loop are used. Transition points were determined experimentally.
 131  */ 
 132 #define BZERO_USE_REP   (1024)
 133 #define BCOPY_DFLT_REP  (128)
 134 #define BCOPY_NHM_REP   (768)
 135 
 136 /*
 137  * Copy a block of storage, returning an error code if `from' or
 138  * `to' takes a kernel pagefault which cannot be resolved.
 139  * Returns errno value on pagefault error, 0 if all ok
 140  */
 141 
 142 /*
 143  * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
 144  * additional call instructions.
 145  */
 146 #if defined(__amd64)
 147 #define SMAP_DISABLE_COUNT      16
 148 #define SMAP_ENABLE_COUNT       26
 149 #elif defined(__i386)
 150 #define SMAP_DISABLE_COUNT      0
 151 #define SMAP_ENABLE_COUNT       0
 152 #endif
 153 
 154 #define SMAP_DISABLE_INSTR(ITER)                \
 155         .globl  _smap_disable_patch_/**/ITER;   \
 156         _smap_disable_patch_/**/ITER/**/:;      \
 157         nop; nop; nop;
 158 
 159 #define SMAP_ENABLE_INSTR(ITER)                 \
 160         .globl  _smap_enable_patch_/**/ITER;    \
 161         _smap_enable_patch_/**/ITER/**/:;       \
 162         nop; nop; nop;
 163 
 164 #if defined(__lint)
 165 
 166 /* ARGSUSED */
 167 int
 168 kcopy(const void *from, void *to, size_t count)
 169 { return (0); }
 170 
 171 #else   /* __lint */
 172 
 173         .globl  kernelbase
 174         .globl  postbootkernelbase
 175 
 176 #if defined(__amd64)
 177 
 178         ENTRY(kcopy)
 179         pushq   %rbp
 180         movq    %rsp, %rbp
 181 #ifdef DEBUG
 182         cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 183         jb      0f
 184         cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 185         jnb     1f
 186 0:      leaq    .kcopy_panic_msg(%rip), %rdi
 187         xorl    %eax, %eax
 188         call    panic
 189 1:
 190 #endif
 191         /*
 192          * pass lofault value as 4th argument to do_copy_fault
 193          */
 194         leaq    _kcopy_copyerr(%rip), %rcx
 195         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
 196 
 197 do_copy_fault:
 198         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
 199         movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
 200         call    bcopy_altentry
 201         xorl    %eax, %eax              /* return 0 (success) */
 202         SMAP_ENABLE_INSTR(0)
 203 
 204         /*
 205          * A fault during do_copy_fault is indicated through an errno value
 206          * in %rax and we iretq from the trap handler to here.
 207          */
 208 _kcopy_copyerr:
 209         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
 210         leave
 211         ret
 212         SET_SIZE(kcopy)
 213 
 214 #elif defined(__i386)
 215 
 216 #define ARG_FROM        8
 217 #define ARG_TO          12
 218 #define ARG_COUNT       16
 219 
 220         ENTRY(kcopy)
 221 #ifdef DEBUG
 222         pushl   %ebp
 223         movl    %esp, %ebp
 224         movl    postbootkernelbase, %eax
 225         cmpl    %eax, ARG_FROM(%ebp)
 226         jb      0f
 227         cmpl    %eax, ARG_TO(%ebp)
 228         jnb     1f
 229 0:      pushl   $.kcopy_panic_msg
 230         call    panic
 231 1:      popl    %ebp
 232 #endif
 233         lea     _kcopy_copyerr, %eax    /* lofault value */
 234         movl    %gs:CPU_THREAD, %edx    
 235 
 236 do_copy_fault:
 237         pushl   %ebp
 238         movl    %esp, %ebp              /* setup stack frame */
 239         pushl   %esi
 240         pushl   %edi                    /* save registers */
 241 
 242         movl    T_LOFAULT(%edx), %edi
 243         pushl   %edi                    /* save the current lofault */
 244         movl    %eax, T_LOFAULT(%edx)   /* new lofault */
 245 
 246         movl    ARG_COUNT(%ebp), %ecx
 247         movl    ARG_FROM(%ebp), %esi
 248         movl    ARG_TO(%ebp), %edi
 249         shrl    $2, %ecx                /* word count */
 250         rep
 251           smovl
 252         movl    ARG_COUNT(%ebp), %ecx
 253         andl    $3, %ecx                /* bytes left over */
 254         rep
 255           smovb
 256         xorl    %eax, %eax
 257 
 258         /*
 259          * A fault during do_copy_fault is indicated through an errno value
 260          * in %eax and we iret from the trap handler to here.
 261          */
 262 _kcopy_copyerr:
 263         popl    %ecx
 264         popl    %edi
 265         movl    %ecx, T_LOFAULT(%edx)   /* restore the original lofault */
 266         popl    %esi
 267         popl    %ebp
 268         ret
 269         SET_SIZE(kcopy)
 270 
 271 #undef  ARG_FROM
 272 #undef  ARG_TO
 273 #undef  ARG_COUNT
 274 
 275 #endif  /* __i386 */
 276 #endif  /* __lint */
 277 
 278 #if defined(__lint)
 279 
 280 /*
 281  * Copy a block of storage.  Similar to kcopy but uses non-temporal
 282  * instructions.
 283  */
 284 
 285 /* ARGSUSED */
 286 int
 287 kcopy_nta(const void *from, void *to, size_t count, int copy_cached)
 288 { return (0); }
 289 
 290 #else   /* __lint */
 291 
 292 #if defined(__amd64)
 293 
 294 #define COPY_LOOP_INIT(src, dst, cnt)   \
 295         addq    cnt, src;                       \
 296         addq    cnt, dst;                       \
 297         shrq    $3, cnt;                        \
 298         neg     cnt
 299 
 300         /* Copy 16 bytes per loop.  Uses %rax and %r8 */
 301 #define COPY_LOOP_BODY(src, dst, cnt)   \
 302         prefetchnta     0x100(src, cnt, 8);     \
 303         movq    (src, cnt, 8), %rax;            \
 304         movq    0x8(src, cnt, 8), %r8;          \
 305         movnti  %rax, (dst, cnt, 8);            \
 306         movnti  %r8, 0x8(dst, cnt, 8);          \
 307         addq    $2, cnt
 308 
 309         ENTRY(kcopy_nta)
 310         pushq   %rbp
 311         movq    %rsp, %rbp
 312 #ifdef DEBUG
 313         cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 314         jb      0f
 315         cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 316         jnb     1f
 317 0:      leaq    .kcopy_panic_msg(%rip), %rdi
 318         xorl    %eax, %eax
 319         call    panic
 320 1:
 321 #endif
 322 
 323         movq    %gs:CPU_THREAD, %r9
 324         cmpq    $0, %rcx                /* No non-temporal access? */
 325         /*
 326          * pass lofault value as 4th argument to do_copy_fault
 327          */
 328         leaq    _kcopy_nta_copyerr(%rip), %rcx  /* doesn't set rflags */
 329         jnz     do_copy_fault           /* use regular access */
 330         /*
 331          * Make sure cnt is >= KCOPY_MIN_SIZE
 332          */
 333         cmpq    $KCOPY_MIN_SIZE, %rdx
 334         jb      do_copy_fault
 335 
 336         /*
 337          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
 338          * count is COUNT_ALIGN_SIZE aligned.
 339          */
 340         movq    %rdi, %r10
 341         orq     %rsi, %r10
 342         andq    $NTA_ALIGN_MASK, %r10
 343         orq     %rdx, %r10
 344         andq    $COUNT_ALIGN_MASK, %r10
 345         jnz     do_copy_fault
 346 
 347         ALTENTRY(do_copy_fault_nta)
 348         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
 349         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
 350         movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
 351 
 352         /*
 353          * COPY_LOOP_BODY uses %rax and %r8
 354          */
 355         COPY_LOOP_INIT(%rdi, %rsi, %rdx)
 356 2:      COPY_LOOP_BODY(%rdi, %rsi, %rdx)
 357         jnz     2b
 358 
 359         mfence
 360         xorl    %eax, %eax              /* return 0 (success) */
 361         SMAP_ENABLE_INSTR(1)
 362 
 363 _kcopy_nta_copyerr:
 364         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
 365         leave
 366         ret
 367         SET_SIZE(do_copy_fault_nta)
 368         SET_SIZE(kcopy_nta)
 369 
 370 #elif defined(__i386)
 371 
 372 #define ARG_FROM        8
 373 #define ARG_TO          12
 374 #define ARG_COUNT       16
 375 
 376 #define COPY_LOOP_INIT(src, dst, cnt)   \
 377         addl    cnt, src;                       \
 378         addl    cnt, dst;                       \
 379         shrl    $3, cnt;                        \
 380         neg     cnt
 381 
 382 #define COPY_LOOP_BODY(src, dst, cnt)   \
 383         prefetchnta     0x100(src, cnt, 8);     \
 384         movl    (src, cnt, 8), %esi;            \
 385         movnti  %esi, (dst, cnt, 8);            \
 386         movl    0x4(src, cnt, 8), %esi;         \
 387         movnti  %esi, 0x4(dst, cnt, 8);         \
 388         movl    0x8(src, cnt, 8), %esi;         \
 389         movnti  %esi, 0x8(dst, cnt, 8);         \
 390         movl    0xc(src, cnt, 8), %esi;         \
 391         movnti  %esi, 0xc(dst, cnt, 8);         \
 392         addl    $2, cnt
 393 
 394         /*
 395          * kcopy_nta is not implemented for 32-bit as no performance
 396          * improvement was shown.  We simply jump directly to kcopy
 397          * and discard the 4 arguments.
 398          */
 399         ENTRY(kcopy_nta)
 400         jmp     kcopy
 401 
 402         lea     _kcopy_nta_copyerr, %eax        /* lofault value */
 403         ALTENTRY(do_copy_fault_nta)
 404         pushl   %ebp
 405         movl    %esp, %ebp              /* setup stack frame */
 406         pushl   %esi
 407         pushl   %edi
 408 
 409         movl    %gs:CPU_THREAD, %edx    
 410         movl    T_LOFAULT(%edx), %edi
 411         pushl   %edi                    /* save the current lofault */
 412         movl    %eax, T_LOFAULT(%edx)   /* new lofault */
 413 
 414         /* COPY_LOOP_BODY needs to use %esi */
 415         movl    ARG_COUNT(%ebp), %ecx
 416         movl    ARG_FROM(%ebp), %edi
 417         movl    ARG_TO(%ebp), %eax
 418         COPY_LOOP_INIT(%edi, %eax, %ecx)
 419 1:      COPY_LOOP_BODY(%edi, %eax, %ecx)
 420         jnz     1b
 421         mfence
 422 
 423         xorl    %eax, %eax
 424 _kcopy_nta_copyerr:
 425         popl    %ecx
 426         popl    %edi
 427         movl    %ecx, T_LOFAULT(%edx)   /* restore the original lofault */
 428         popl    %esi
 429         leave
 430         ret
 431         SET_SIZE(do_copy_fault_nta)
 432         SET_SIZE(kcopy_nta)
 433 
 434 #undef  ARG_FROM
 435 #undef  ARG_TO
 436 #undef  ARG_COUNT
 437 
 438 #endif  /* __i386 */
 439 #endif  /* __lint */
 440 
 441 #if defined(__lint)
 442 
 443 /* ARGSUSED */
 444 void
 445 bcopy(const void *from, void *to, size_t count)
 446 {}
 447 
 448 #else   /* __lint */
 449 
 450 #if defined(__amd64)
 451 
 452         ENTRY(bcopy)
 453 #ifdef DEBUG
 454         orq     %rdx, %rdx              /* %rdx = count */
 455         jz      1f
 456         cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 457         jb      0f
 458         cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */         
 459         jnb     1f
 460 0:      leaq    .bcopy_panic_msg(%rip), %rdi
 461         jmp     call_panic              /* setup stack and call panic */
 462 1:
 463 #endif
 464         /*
 465          * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
 466          * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
 467          * uses these registers in future they must be saved and restored.
 468          */
 469         ALTENTRY(bcopy_altentry)
 470 do_copy:
 471 #define L(s) .bcopy/**/s
 472         cmpq    $0x50, %rdx             /* 80 */
 473         jae     bcopy_ck_size
 474 
 475         /*
 476          * Performance data shows many caller's copy small buffers. So for
 477          * best perf for these sizes unrolled code is used. Store data without
 478          * worrying about alignment.
 479          */
 480         leaq    L(fwdPxQx)(%rip), %r10
 481         addq    %rdx, %rdi
 482         addq    %rdx, %rsi
 483         movslq  (%r10,%rdx,4), %rcx
 484         leaq    (%rcx,%r10,1), %r10
 485         jmpq    *%r10
 486 
 487         .p2align 4
 488 L(fwdPxQx):
 489         .int       L(P0Q0)-L(fwdPxQx)   /* 0 */
 490         .int       L(P1Q0)-L(fwdPxQx)
 491         .int       L(P2Q0)-L(fwdPxQx)
 492         .int       L(P3Q0)-L(fwdPxQx)
 493         .int       L(P4Q0)-L(fwdPxQx)
 494         .int       L(P5Q0)-L(fwdPxQx)
 495         .int       L(P6Q0)-L(fwdPxQx)
 496         .int       L(P7Q0)-L(fwdPxQx) 
 497 
 498         .int       L(P0Q1)-L(fwdPxQx)   /* 8 */
 499         .int       L(P1Q1)-L(fwdPxQx)
 500         .int       L(P2Q1)-L(fwdPxQx)
 501         .int       L(P3Q1)-L(fwdPxQx)
 502         .int       L(P4Q1)-L(fwdPxQx)
 503         .int       L(P5Q1)-L(fwdPxQx)
 504         .int       L(P6Q1)-L(fwdPxQx)
 505         .int       L(P7Q1)-L(fwdPxQx) 
 506 
 507         .int       L(P0Q2)-L(fwdPxQx)   /* 16 */
 508         .int       L(P1Q2)-L(fwdPxQx)
 509         .int       L(P2Q2)-L(fwdPxQx)
 510         .int       L(P3Q2)-L(fwdPxQx)
 511         .int       L(P4Q2)-L(fwdPxQx)
 512         .int       L(P5Q2)-L(fwdPxQx)
 513         .int       L(P6Q2)-L(fwdPxQx)
 514         .int       L(P7Q2)-L(fwdPxQx) 
 515 
 516         .int       L(P0Q3)-L(fwdPxQx)   /* 24 */
 517         .int       L(P1Q3)-L(fwdPxQx)
 518         .int       L(P2Q3)-L(fwdPxQx)
 519         .int       L(P3Q3)-L(fwdPxQx)
 520         .int       L(P4Q3)-L(fwdPxQx)
 521         .int       L(P5Q3)-L(fwdPxQx)
 522         .int       L(P6Q3)-L(fwdPxQx)
 523         .int       L(P7Q3)-L(fwdPxQx) 
 524 
 525         .int       L(P0Q4)-L(fwdPxQx)   /* 32 */
 526         .int       L(P1Q4)-L(fwdPxQx)
 527         .int       L(P2Q4)-L(fwdPxQx)
 528         .int       L(P3Q4)-L(fwdPxQx)
 529         .int       L(P4Q4)-L(fwdPxQx)
 530         .int       L(P5Q4)-L(fwdPxQx)
 531         .int       L(P6Q4)-L(fwdPxQx)
 532         .int       L(P7Q4)-L(fwdPxQx) 
 533 
 534         .int       L(P0Q5)-L(fwdPxQx)   /* 40 */
 535         .int       L(P1Q5)-L(fwdPxQx)
 536         .int       L(P2Q5)-L(fwdPxQx)
 537         .int       L(P3Q5)-L(fwdPxQx)
 538         .int       L(P4Q5)-L(fwdPxQx)
 539         .int       L(P5Q5)-L(fwdPxQx)
 540         .int       L(P6Q5)-L(fwdPxQx)
 541         .int       L(P7Q5)-L(fwdPxQx) 
 542 
 543         .int       L(P0Q6)-L(fwdPxQx)   /* 48 */
 544         .int       L(P1Q6)-L(fwdPxQx)
 545         .int       L(P2Q6)-L(fwdPxQx)
 546         .int       L(P3Q6)-L(fwdPxQx)
 547         .int       L(P4Q6)-L(fwdPxQx)
 548         .int       L(P5Q6)-L(fwdPxQx)
 549         .int       L(P6Q6)-L(fwdPxQx)
 550         .int       L(P7Q6)-L(fwdPxQx) 
 551 
 552         .int       L(P0Q7)-L(fwdPxQx)   /* 56 */
 553         .int       L(P1Q7)-L(fwdPxQx)
 554         .int       L(P2Q7)-L(fwdPxQx)
 555         .int       L(P3Q7)-L(fwdPxQx)
 556         .int       L(P4Q7)-L(fwdPxQx)
 557         .int       L(P5Q7)-L(fwdPxQx)
 558         .int       L(P6Q7)-L(fwdPxQx)
 559         .int       L(P7Q7)-L(fwdPxQx) 
 560 
 561         .int       L(P0Q8)-L(fwdPxQx)   /* 64 */
 562         .int       L(P1Q8)-L(fwdPxQx)
 563         .int       L(P2Q8)-L(fwdPxQx)
 564         .int       L(P3Q8)-L(fwdPxQx)
 565         .int       L(P4Q8)-L(fwdPxQx)
 566         .int       L(P5Q8)-L(fwdPxQx)
 567         .int       L(P6Q8)-L(fwdPxQx)
 568         .int       L(P7Q8)-L(fwdPxQx)
 569 
 570         .int       L(P0Q9)-L(fwdPxQx)   /* 72 */
 571         .int       L(P1Q9)-L(fwdPxQx)
 572         .int       L(P2Q9)-L(fwdPxQx)
 573         .int       L(P3Q9)-L(fwdPxQx)
 574         .int       L(P4Q9)-L(fwdPxQx)
 575         .int       L(P5Q9)-L(fwdPxQx)
 576         .int       L(P6Q9)-L(fwdPxQx)
 577         .int       L(P7Q9)-L(fwdPxQx)   /* 79 */
 578 
 579         .p2align 4
 580 L(P0Q9):
 581         mov    -0x48(%rdi), %rcx
 582         mov    %rcx, -0x48(%rsi)
 583 L(P0Q8):
 584         mov    -0x40(%rdi), %r10
 585         mov    %r10, -0x40(%rsi)
 586 L(P0Q7):
 587         mov    -0x38(%rdi), %r8
 588         mov    %r8, -0x38(%rsi)
 589 L(P0Q6):
 590         mov    -0x30(%rdi), %rcx
 591         mov    %rcx, -0x30(%rsi)
 592 L(P0Q5):
 593         mov    -0x28(%rdi), %r10
 594         mov    %r10, -0x28(%rsi)
 595 L(P0Q4):
 596         mov    -0x20(%rdi), %r8
 597         mov    %r8, -0x20(%rsi)
 598 L(P0Q3):
 599         mov    -0x18(%rdi), %rcx
 600         mov    %rcx, -0x18(%rsi)
 601 L(P0Q2):
 602         mov    -0x10(%rdi), %r10
 603         mov    %r10, -0x10(%rsi)
 604 L(P0Q1):
 605         mov    -0x8(%rdi), %r8
 606         mov    %r8, -0x8(%rsi)
 607 L(P0Q0):                                   
 608         ret   
 609 
 610         .p2align 4
 611 L(P1Q9):
 612         mov    -0x49(%rdi), %r8
 613         mov    %r8, -0x49(%rsi)
 614 L(P1Q8):
 615         mov    -0x41(%rdi), %rcx
 616         mov    %rcx, -0x41(%rsi)
 617 L(P1Q7):
 618         mov    -0x39(%rdi), %r10
 619         mov    %r10, -0x39(%rsi)
 620 L(P1Q6):
 621         mov    -0x31(%rdi), %r8
 622         mov    %r8, -0x31(%rsi)
 623 L(P1Q5):
 624         mov    -0x29(%rdi), %rcx
 625         mov    %rcx, -0x29(%rsi)
 626 L(P1Q4):
 627         mov    -0x21(%rdi), %r10
 628         mov    %r10, -0x21(%rsi)
 629 L(P1Q3):
 630         mov    -0x19(%rdi), %r8
 631         mov    %r8, -0x19(%rsi)
 632 L(P1Q2):
 633         mov    -0x11(%rdi), %rcx
 634         mov    %rcx, -0x11(%rsi)
 635 L(P1Q1):
 636         mov    -0x9(%rdi), %r10
 637         mov    %r10, -0x9(%rsi)
 638 L(P1Q0):
 639         movzbq -0x1(%rdi), %r8
 640         mov    %r8b, -0x1(%rsi)
 641         ret   
 642 
 643         .p2align 4
 644 L(P2Q9):
 645         mov    -0x4a(%rdi), %r8
 646         mov    %r8, -0x4a(%rsi)
 647 L(P2Q8):
 648         mov    -0x42(%rdi), %rcx
 649         mov    %rcx, -0x42(%rsi)
 650 L(P2Q7):
 651         mov    -0x3a(%rdi), %r10
 652         mov    %r10, -0x3a(%rsi)
 653 L(P2Q6):
 654         mov    -0x32(%rdi), %r8
 655         mov    %r8, -0x32(%rsi)
 656 L(P2Q5):
 657         mov    -0x2a(%rdi), %rcx
 658         mov    %rcx, -0x2a(%rsi)
 659 L(P2Q4):
 660         mov    -0x22(%rdi), %r10
 661         mov    %r10, -0x22(%rsi)
 662 L(P2Q3):
 663         mov    -0x1a(%rdi), %r8
 664         mov    %r8, -0x1a(%rsi)
 665 L(P2Q2):
 666         mov    -0x12(%rdi), %rcx
 667         mov    %rcx, -0x12(%rsi)
 668 L(P2Q1):
 669         mov    -0xa(%rdi), %r10
 670         mov    %r10, -0xa(%rsi)
 671 L(P2Q0):
 672         movzwq -0x2(%rdi), %r8
 673         mov    %r8w, -0x2(%rsi)
 674         ret   
 675 
 676         .p2align 4
 677 L(P3Q9):
 678         mov    -0x4b(%rdi), %r8
 679         mov    %r8, -0x4b(%rsi)
 680 L(P3Q8):
 681         mov    -0x43(%rdi), %rcx
 682         mov    %rcx, -0x43(%rsi)
 683 L(P3Q7):
 684         mov    -0x3b(%rdi), %r10
 685         mov    %r10, -0x3b(%rsi)
 686 L(P3Q6):
 687         mov    -0x33(%rdi), %r8
 688         mov    %r8, -0x33(%rsi)
 689 L(P3Q5):
 690         mov    -0x2b(%rdi), %rcx
 691         mov    %rcx, -0x2b(%rsi)
 692 L(P3Q4):
 693         mov    -0x23(%rdi), %r10
 694         mov    %r10, -0x23(%rsi)
 695 L(P3Q3):
 696         mov    -0x1b(%rdi), %r8
 697         mov    %r8, -0x1b(%rsi)
 698 L(P3Q2):
 699         mov    -0x13(%rdi), %rcx
 700         mov    %rcx, -0x13(%rsi)
 701 L(P3Q1):
 702         mov    -0xb(%rdi), %r10
 703         mov    %r10, -0xb(%rsi)
 704         /*
 705          * These trailing loads/stores have to do all their loads 1st, 
 706          * then do the stores.
 707          */
 708 L(P3Q0):
 709         movzwq -0x3(%rdi), %r8
 710         movzbq -0x1(%rdi), %r10
 711         mov    %r8w, -0x3(%rsi)
 712         mov    %r10b, -0x1(%rsi)
 713         ret   
 714 
 715         .p2align 4
 716 L(P4Q9):
 717         mov    -0x4c(%rdi), %r8
 718         mov    %r8, -0x4c(%rsi)
 719 L(P4Q8):
 720         mov    -0x44(%rdi), %rcx
 721         mov    %rcx, -0x44(%rsi)
 722 L(P4Q7):
 723         mov    -0x3c(%rdi), %r10
 724         mov    %r10, -0x3c(%rsi)
 725 L(P4Q6):
 726         mov    -0x34(%rdi), %r8
 727         mov    %r8, -0x34(%rsi)
 728 L(P4Q5):
 729         mov    -0x2c(%rdi), %rcx
 730         mov    %rcx, -0x2c(%rsi)
 731 L(P4Q4):
 732         mov    -0x24(%rdi), %r10
 733         mov    %r10, -0x24(%rsi)
 734 L(P4Q3):
 735         mov    -0x1c(%rdi), %r8
 736         mov    %r8, -0x1c(%rsi)
 737 L(P4Q2):
 738         mov    -0x14(%rdi), %rcx
 739         mov    %rcx, -0x14(%rsi)
 740 L(P4Q1):
 741         mov    -0xc(%rdi), %r10
 742         mov    %r10, -0xc(%rsi)
 743 L(P4Q0):
 744         mov    -0x4(%rdi), %r8d
 745         mov    %r8d, -0x4(%rsi)
 746         ret   
 747 
 748         .p2align 4
 749 L(P5Q9):
 750         mov    -0x4d(%rdi), %r8
 751         mov    %r8, -0x4d(%rsi)
 752 L(P5Q8):
 753         mov    -0x45(%rdi), %rcx
 754         mov    %rcx, -0x45(%rsi)
 755 L(P5Q7):
 756         mov    -0x3d(%rdi), %r10
 757         mov    %r10, -0x3d(%rsi)
 758 L(P5Q6):
 759         mov    -0x35(%rdi), %r8
 760         mov    %r8, -0x35(%rsi)
 761 L(P5Q5):
 762         mov    -0x2d(%rdi), %rcx
 763         mov    %rcx, -0x2d(%rsi)
 764 L(P5Q4):
 765         mov    -0x25(%rdi), %r10
 766         mov    %r10, -0x25(%rsi)
 767 L(P5Q3):
 768         mov    -0x1d(%rdi), %r8
 769         mov    %r8, -0x1d(%rsi)
 770 L(P5Q2):
 771         mov    -0x15(%rdi), %rcx
 772         mov    %rcx, -0x15(%rsi)
 773 L(P5Q1):
 774         mov    -0xd(%rdi), %r10
 775         mov    %r10, -0xd(%rsi)
 776 L(P5Q0):
 777         mov    -0x5(%rdi), %r8d
 778         movzbq -0x1(%rdi), %r10
 779         mov    %r8d, -0x5(%rsi)
 780         mov    %r10b, -0x1(%rsi)
 781         ret   
 782 
 783         .p2align 4
 784 L(P6Q9):
 785         mov    -0x4e(%rdi), %r8
 786         mov    %r8, -0x4e(%rsi)
 787 L(P6Q8):
 788         mov    -0x46(%rdi), %rcx
 789         mov    %rcx, -0x46(%rsi)
 790 L(P6Q7):
 791         mov    -0x3e(%rdi), %r10
 792         mov    %r10, -0x3e(%rsi)
 793 L(P6Q6):
 794         mov    -0x36(%rdi), %r8
 795         mov    %r8, -0x36(%rsi)
 796 L(P6Q5):
 797         mov    -0x2e(%rdi), %rcx
 798         mov    %rcx, -0x2e(%rsi)
 799 L(P6Q4):
 800         mov    -0x26(%rdi), %r10
 801         mov    %r10, -0x26(%rsi)
 802 L(P6Q3):
 803         mov    -0x1e(%rdi), %r8
 804         mov    %r8, -0x1e(%rsi)
 805 L(P6Q2):
 806         mov    -0x16(%rdi), %rcx
 807         mov    %rcx, -0x16(%rsi)
 808 L(P6Q1):
 809         mov    -0xe(%rdi), %r10
 810         mov    %r10, -0xe(%rsi)
 811 L(P6Q0):
 812         mov    -0x6(%rdi), %r8d
 813         movzwq -0x2(%rdi), %r10
 814         mov    %r8d, -0x6(%rsi)
 815         mov    %r10w, -0x2(%rsi)
 816         ret   
 817 
 818         .p2align 4
 819 L(P7Q9):
 820         mov    -0x4f(%rdi), %r8
 821         mov    %r8, -0x4f(%rsi)
 822 L(P7Q8):
 823         mov    -0x47(%rdi), %rcx
 824         mov    %rcx, -0x47(%rsi)
 825 L(P7Q7):
 826         mov    -0x3f(%rdi), %r10
 827         mov    %r10, -0x3f(%rsi)
 828 L(P7Q6):
 829         mov    -0x37(%rdi), %r8
 830         mov    %r8, -0x37(%rsi)
 831 L(P7Q5):
 832         mov    -0x2f(%rdi), %rcx
 833         mov    %rcx, -0x2f(%rsi)
 834 L(P7Q4):
 835         mov    -0x27(%rdi), %r10
 836         mov    %r10, -0x27(%rsi)
 837 L(P7Q3):
 838         mov    -0x1f(%rdi), %r8
 839         mov    %r8, -0x1f(%rsi)
 840 L(P7Q2):
 841         mov    -0x17(%rdi), %rcx
 842         mov    %rcx, -0x17(%rsi)
 843 L(P7Q1):
 844         mov    -0xf(%rdi), %r10
 845         mov    %r10, -0xf(%rsi)
 846 L(P7Q0):
 847         mov    -0x7(%rdi), %r8d
 848         movzwq -0x3(%rdi), %r10
 849         movzbq -0x1(%rdi), %rcx
 850         mov    %r8d, -0x7(%rsi)
 851         mov    %r10w, -0x3(%rsi)
 852         mov    %cl, -0x1(%rsi)
 853         ret   
 854 
 855         /*
 856          * For large sizes rep smovq is fastest.
 857          * Transition point determined experimentally as measured on
 858          * Intel Xeon processors (incl. Nehalem and previous generations) and
 859          * AMD Opteron. The transition value is patched at boot time to avoid
 860          * memory reference hit.
 861          */
 862         .globl bcopy_patch_start
 863 bcopy_patch_start:
 864         cmpq    $BCOPY_NHM_REP, %rdx
 865         .globl bcopy_patch_end
 866 bcopy_patch_end:
 867 
 868         .p2align 4
 869         .globl bcopy_ck_size
 870 bcopy_ck_size:
 871         cmpq    $BCOPY_DFLT_REP, %rdx
 872         jae     L(use_rep)
 873 
 874         /*
 875          * Align to a 8-byte boundary. Avoids penalties from unaligned stores
 876          * as well as from stores spanning cachelines.
 877          */
 878         test    $0x7, %rsi
 879         jz      L(aligned_loop)
 880         test    $0x1, %rsi
 881         jz      2f
 882         movzbq  (%rdi), %r8
 883         dec     %rdx
 884         inc     %rdi
 885         mov     %r8b, (%rsi)
 886         inc     %rsi
 887 2:
 888         test    $0x2, %rsi
 889         jz      4f
 890         movzwq  (%rdi), %r8
 891         sub     $0x2, %rdx
 892         add     $0x2, %rdi
 893         mov     %r8w, (%rsi)
 894         add     $0x2, %rsi
 895 4:
 896         test    $0x4, %rsi
 897         jz      L(aligned_loop)
 898         mov     (%rdi), %r8d
 899         sub     $0x4, %rdx
 900         add     $0x4, %rdi
 901         mov     %r8d, (%rsi)
 902         add     $0x4, %rsi
 903 
 904         /*
 905          * Copy 64-bytes per loop
 906          */
 907         .p2align 4
 908 L(aligned_loop):
 909         mov     (%rdi), %r8
 910         mov     0x8(%rdi), %r10
 911         lea     -0x40(%rdx), %rdx
 912         mov     %r8, (%rsi)
 913         mov     %r10, 0x8(%rsi)
 914         mov     0x10(%rdi), %rcx
 915         mov     0x18(%rdi), %r8
 916         mov     %rcx, 0x10(%rsi)
 917         mov     %r8, 0x18(%rsi)
 918 
 919         cmp     $0x40, %rdx
 920         mov     0x20(%rdi), %r10
 921         mov     0x28(%rdi), %rcx
 922         mov     %r10, 0x20(%rsi)
 923         mov     %rcx, 0x28(%rsi)
 924         mov     0x30(%rdi), %r8
 925         mov     0x38(%rdi), %r10
 926         lea     0x40(%rdi), %rdi
 927         mov     %r8, 0x30(%rsi)
 928         mov     %r10, 0x38(%rsi)
 929         lea     0x40(%rsi), %rsi
 930         jae     L(aligned_loop)
 931 
 932         /*
 933          * Copy remaining bytes (0-63)
 934          */
 935 L(do_remainder):
 936         leaq    L(fwdPxQx)(%rip), %r10
 937         addq    %rdx, %rdi
 938         addq    %rdx, %rsi
 939         movslq  (%r10,%rdx,4), %rcx
 940         leaq    (%rcx,%r10,1), %r10
 941         jmpq    *%r10
 942 
 943         /*
 944          * Use rep smovq. Clear remainder via unrolled code
 945          */
 946         .p2align 4
 947 L(use_rep):
 948         xchgq   %rdi, %rsi              /* %rsi = source, %rdi = destination */
 949         movq    %rdx, %rcx              /* %rcx = count */
 950         shrq    $3, %rcx                /* 8-byte word count */
 951         rep
 952           smovq
 953 
 954         xchgq   %rsi, %rdi              /* %rdi = src, %rsi = destination */
 955         andq    $7, %rdx                /* remainder */
 956         jnz     L(do_remainder)
 957         ret
 958 #undef  L
 959 
 960 #ifdef DEBUG
 961         /*
 962          * Setup frame on the run-time stack. The end of the input argument
 963          * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
 964          * always points to the end of the latest allocated stack frame.
 965          * panic(const char *format, ...) is a varargs function. When a
 966          * function taking variable arguments is called, %rax must be set
 967          * to eight times the number of floating point parameters passed
 968          * to the function in SSE registers.
 969          */
 970 call_panic:
 971         pushq   %rbp                    /* align stack properly */
 972         movq    %rsp, %rbp
 973         xorl    %eax, %eax              /* no variable arguments */
 974         call    panic                   /* %rdi = format string */
 975 #endif
 976         SET_SIZE(bcopy_altentry)
 977         SET_SIZE(bcopy)
 978 
 979 #elif defined(__i386)
 980 
 981 #define ARG_FROM        4
 982 #define ARG_TO          8
 983 #define ARG_COUNT       12
 984 
 985         ENTRY(bcopy)
 986 #ifdef DEBUG
 987         movl    ARG_COUNT(%esp), %eax
 988         orl     %eax, %eax
 989         jz      1f
 990         movl    postbootkernelbase, %eax
 991         cmpl    %eax, ARG_FROM(%esp)
 992         jb      0f
 993         cmpl    %eax, ARG_TO(%esp)
 994         jnb     1f
 995 0:      pushl   %ebp
 996         movl    %esp, %ebp
 997         pushl   $.bcopy_panic_msg
 998         call    panic
 999 1:
1000 #endif
1001 do_copy:
1002         movl    %esi, %eax              /* save registers */
1003         movl    %edi, %edx
1004         movl    ARG_COUNT(%esp), %ecx
1005         movl    ARG_FROM(%esp), %esi
1006         movl    ARG_TO(%esp), %edi
1007 
1008         shrl    $2, %ecx                /* word count */
1009         rep
1010           smovl
1011         movl    ARG_COUNT(%esp), %ecx
1012         andl    $3, %ecx                /* bytes left over */
1013         rep
1014           smovb
1015         movl    %eax, %esi              /* restore registers */
1016         movl    %edx, %edi
1017         ret
1018         SET_SIZE(bcopy)
1019 
1020 #undef  ARG_COUNT
1021 #undef  ARG_FROM
1022 #undef  ARG_TO
1023 
1024 #endif  /* __i386 */
1025 #endif  /* __lint */
1026 
1027 
1028 /*
1029  * Zero a block of storage, returning an error code if we
1030  * take a kernel pagefault which cannot be resolved.
1031  * Returns errno value on pagefault error, 0 if all ok
1032  */
1033 
1034 #if defined(__lint)
1035 
1036 /* ARGSUSED */
1037 int
1038 kzero(void *addr, size_t count)
1039 { return (0); }
1040 
1041 #else   /* __lint */
1042 
1043 #if defined(__amd64)
1044 
1045         ENTRY(kzero)
1046 #ifdef DEBUG
1047         cmpq    postbootkernelbase(%rip), %rdi  /* %rdi = addr */
1048         jnb     0f
1049         leaq    .kzero_panic_msg(%rip), %rdi
1050         jmp     call_panic              /* setup stack and call panic */
1051 0:
1052 #endif
1053         /*
1054          * pass lofault value as 3rd argument for fault return 
1055          */
1056         leaq    _kzeroerr(%rip), %rdx
1057 
1058         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
1059         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
1060         movq    %rdx, T_LOFAULT(%r9)    /* new lofault */
1061         call    bzero_altentry
1062         xorl    %eax, %eax
1063         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
1064         ret
1065         /*
1066          * A fault during bzero is indicated through an errno value
1067          * in %rax when we iretq to here.
1068          */
1069 _kzeroerr:
1070         addq    $8, %rsp                /* pop bzero_altentry call ret addr */
1071         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
1072         ret
1073         SET_SIZE(kzero)
1074 
1075 #elif defined(__i386)
1076 
1077 #define ARG_ADDR        8
1078 #define ARG_COUNT       12
1079 
1080         ENTRY(kzero)
1081 #ifdef DEBUG
1082         pushl   %ebp
1083         movl    %esp, %ebp
1084         movl    postbootkernelbase, %eax
1085         cmpl    %eax, ARG_ADDR(%ebp)
1086         jnb     0f
1087         pushl   $.kzero_panic_msg
1088         call    panic
1089 0:      popl    %ebp
1090 #endif
1091         lea     _kzeroerr, %eax         /* kzeroerr is lofault value */
1092 
1093         pushl   %ebp                    /* save stack base */
1094         movl    %esp, %ebp              /* set new stack base */
1095         pushl   %edi                    /* save %edi */
1096 
1097         mov     %gs:CPU_THREAD, %edx    
1098         movl    T_LOFAULT(%edx), %edi
1099         pushl   %edi                    /* save the current lofault */
1100         movl    %eax, T_LOFAULT(%edx)   /* new lofault */
1101 
1102         movl    ARG_COUNT(%ebp), %ecx   /* get size in bytes */
1103         movl    ARG_ADDR(%ebp), %edi    /* %edi <- address of bytes to clear */
1104         shrl    $2, %ecx                /* Count of double words to zero */
1105         xorl    %eax, %eax              /* sstol val */
1106         rep
1107           sstol                 /* %ecx contains words to clear (%eax=0) */
1108 
1109         movl    ARG_COUNT(%ebp), %ecx   /* get size in bytes */
1110         andl    $3, %ecx                /* do mod 4 */
1111         rep
1112           sstob                 /* %ecx contains residual bytes to clear */
1113 
1114         /*
1115          * A fault during kzero is indicated through an errno value
1116          * in %eax when we iret to here.
1117          */
1118 _kzeroerr:
1119         popl    %edi
1120         movl    %edi, T_LOFAULT(%edx)   /* restore the original lofault */
1121         popl    %edi
1122         popl    %ebp
1123         ret
1124         SET_SIZE(kzero)
1125 
1126 #undef  ARG_ADDR
1127 #undef  ARG_COUNT
1128 
1129 #endif  /* __i386 */
1130 #endif  /* __lint */
1131 
1132 /*
1133  * Zero a block of storage.
1134  */
1135 
1136 #if defined(__lint)
1137 
1138 /* ARGSUSED */
1139 void
1140 bzero(void *addr, size_t count)
1141 {}
1142 
1143 #else   /* __lint */
1144 
1145 #if defined(__amd64)
1146 
1147         ENTRY(bzero)
1148 #ifdef DEBUG
1149         cmpq    postbootkernelbase(%rip), %rdi  /* %rdi = addr */
1150         jnb     0f
1151         leaq    .bzero_panic_msg(%rip), %rdi
1152         jmp     call_panic              /* setup stack and call panic */
1153 0:
1154 #endif
1155         ALTENTRY(bzero_altentry)
1156 do_zero:
1157 #define L(s) .bzero/**/s
1158         xorl    %eax, %eax
1159 
1160         cmpq    $0x50, %rsi             /* 80 */
1161         jae     L(ck_align)
1162 
1163         /*
1164          * Performance data shows many caller's are zeroing small buffers. So
1165          * for best perf for these sizes unrolled code is used. Store zeros
1166          * without worrying about alignment.
1167          */
1168         leaq    L(setPxQx)(%rip), %r10
1169         addq    %rsi, %rdi
1170         movslq  (%r10,%rsi,4), %rcx
1171         leaq    (%rcx,%r10,1), %r10
1172         jmpq    *%r10
1173 
1174         .p2align 4
1175 L(setPxQx):
1176         .int       L(P0Q0)-L(setPxQx)   /* 0 */
1177         .int       L(P1Q0)-L(setPxQx)
1178         .int       L(P2Q0)-L(setPxQx)
1179         .int       L(P3Q0)-L(setPxQx)
1180         .int       L(P4Q0)-L(setPxQx)
1181         .int       L(P5Q0)-L(setPxQx)
1182         .int       L(P6Q0)-L(setPxQx)
1183         .int       L(P7Q0)-L(setPxQx) 
1184 
1185         .int       L(P0Q1)-L(setPxQx)   /* 8 */
1186         .int       L(P1Q1)-L(setPxQx)
1187         .int       L(P2Q1)-L(setPxQx)
1188         .int       L(P3Q1)-L(setPxQx)
1189         .int       L(P4Q1)-L(setPxQx)
1190         .int       L(P5Q1)-L(setPxQx)
1191         .int       L(P6Q1)-L(setPxQx)
1192         .int       L(P7Q1)-L(setPxQx) 
1193 
1194         .int       L(P0Q2)-L(setPxQx)   /* 16 */
1195         .int       L(P1Q2)-L(setPxQx)
1196         .int       L(P2Q2)-L(setPxQx)
1197         .int       L(P3Q2)-L(setPxQx)
1198         .int       L(P4Q2)-L(setPxQx)
1199         .int       L(P5Q2)-L(setPxQx)
1200         .int       L(P6Q2)-L(setPxQx)
1201         .int       L(P7Q2)-L(setPxQx) 
1202 
1203         .int       L(P0Q3)-L(setPxQx)   /* 24 */
1204         .int       L(P1Q3)-L(setPxQx)
1205         .int       L(P2Q3)-L(setPxQx)
1206         .int       L(P3Q3)-L(setPxQx)
1207         .int       L(P4Q3)-L(setPxQx)
1208         .int       L(P5Q3)-L(setPxQx)
1209         .int       L(P6Q3)-L(setPxQx)
1210         .int       L(P7Q3)-L(setPxQx) 
1211 
1212         .int       L(P0Q4)-L(setPxQx)   /* 32 */
1213         .int       L(P1Q4)-L(setPxQx)
1214         .int       L(P2Q4)-L(setPxQx)
1215         .int       L(P3Q4)-L(setPxQx)
1216         .int       L(P4Q4)-L(setPxQx)
1217         .int       L(P5Q4)-L(setPxQx)
1218         .int       L(P6Q4)-L(setPxQx)
1219         .int       L(P7Q4)-L(setPxQx) 
1220 
1221         .int       L(P0Q5)-L(setPxQx)   /* 40 */
1222         .int       L(P1Q5)-L(setPxQx)
1223         .int       L(P2Q5)-L(setPxQx)
1224         .int       L(P3Q5)-L(setPxQx)
1225         .int       L(P4Q5)-L(setPxQx)
1226         .int       L(P5Q5)-L(setPxQx)
1227         .int       L(P6Q5)-L(setPxQx)
1228         .int       L(P7Q5)-L(setPxQx) 
1229 
1230         .int       L(P0Q6)-L(setPxQx)   /* 48 */
1231         .int       L(P1Q6)-L(setPxQx)
1232         .int       L(P2Q6)-L(setPxQx)
1233         .int       L(P3Q6)-L(setPxQx)
1234         .int       L(P4Q6)-L(setPxQx)
1235         .int       L(P5Q6)-L(setPxQx)
1236         .int       L(P6Q6)-L(setPxQx)
1237         .int       L(P7Q6)-L(setPxQx) 
1238 
1239         .int       L(P0Q7)-L(setPxQx)   /* 56 */
1240         .int       L(P1Q7)-L(setPxQx)
1241         .int       L(P2Q7)-L(setPxQx)
1242         .int       L(P3Q7)-L(setPxQx)
1243         .int       L(P4Q7)-L(setPxQx)
1244         .int       L(P5Q7)-L(setPxQx)
1245         .int       L(P6Q7)-L(setPxQx)
1246         .int       L(P7Q7)-L(setPxQx) 
1247 
1248         .int       L(P0Q8)-L(setPxQx)   /* 64 */
1249         .int       L(P1Q8)-L(setPxQx)
1250         .int       L(P2Q8)-L(setPxQx)
1251         .int       L(P3Q8)-L(setPxQx)
1252         .int       L(P4Q8)-L(setPxQx)
1253         .int       L(P5Q8)-L(setPxQx)
1254         .int       L(P6Q8)-L(setPxQx)
1255         .int       L(P7Q8)-L(setPxQx)
1256 
1257         .int       L(P0Q9)-L(setPxQx)   /* 72 */
1258         .int       L(P1Q9)-L(setPxQx)
1259         .int       L(P2Q9)-L(setPxQx)
1260         .int       L(P3Q9)-L(setPxQx)
1261         .int       L(P4Q9)-L(setPxQx)
1262         .int       L(P5Q9)-L(setPxQx)
1263         .int       L(P6Q9)-L(setPxQx)
1264         .int       L(P7Q9)-L(setPxQx)   /* 79 */
1265 
1266         .p2align 4
1267 L(P0Q9): mov    %rax, -0x48(%rdi)
1268 L(P0Q8): mov    %rax, -0x40(%rdi)
1269 L(P0Q7): mov    %rax, -0x38(%rdi)
1270 L(P0Q6): mov    %rax, -0x30(%rdi)
1271 L(P0Q5): mov    %rax, -0x28(%rdi)
1272 L(P0Q4): mov    %rax, -0x20(%rdi)
1273 L(P0Q3): mov    %rax, -0x18(%rdi)
1274 L(P0Q2): mov    %rax, -0x10(%rdi)
1275 L(P0Q1): mov    %rax, -0x8(%rdi)
1276 L(P0Q0): 
1277          ret
1278 
1279         .p2align 4
1280 L(P1Q9): mov    %rax, -0x49(%rdi)
1281 L(P1Q8): mov    %rax, -0x41(%rdi)
1282 L(P1Q7): mov    %rax, -0x39(%rdi)
1283 L(P1Q6): mov    %rax, -0x31(%rdi)
1284 L(P1Q5): mov    %rax, -0x29(%rdi)
1285 L(P1Q4): mov    %rax, -0x21(%rdi)
1286 L(P1Q3): mov    %rax, -0x19(%rdi)
1287 L(P1Q2): mov    %rax, -0x11(%rdi)
1288 L(P1Q1): mov    %rax, -0x9(%rdi)
1289 L(P1Q0): mov    %al, -0x1(%rdi)
1290          ret
1291 
1292         .p2align 4
1293 L(P2Q9): mov    %rax, -0x4a(%rdi)
1294 L(P2Q8): mov    %rax, -0x42(%rdi)
1295 L(P2Q7): mov    %rax, -0x3a(%rdi)
1296 L(P2Q6): mov    %rax, -0x32(%rdi)
1297 L(P2Q5): mov    %rax, -0x2a(%rdi)
1298 L(P2Q4): mov    %rax, -0x22(%rdi)
1299 L(P2Q3): mov    %rax, -0x1a(%rdi)
1300 L(P2Q2): mov    %rax, -0x12(%rdi)
1301 L(P2Q1): mov    %rax, -0xa(%rdi)
1302 L(P2Q0): mov    %ax, -0x2(%rdi)
1303          ret
1304 
1305         .p2align 4
1306 L(P3Q9): mov    %rax, -0x4b(%rdi)
1307 L(P3Q8): mov    %rax, -0x43(%rdi)
1308 L(P3Q7): mov    %rax, -0x3b(%rdi)
1309 L(P3Q6): mov    %rax, -0x33(%rdi)
1310 L(P3Q5): mov    %rax, -0x2b(%rdi)
1311 L(P3Q4): mov    %rax, -0x23(%rdi)
1312 L(P3Q3): mov    %rax, -0x1b(%rdi)
1313 L(P3Q2): mov    %rax, -0x13(%rdi)
1314 L(P3Q1): mov    %rax, -0xb(%rdi)
1315 L(P3Q0): mov    %ax, -0x3(%rdi)
1316          mov    %al, -0x1(%rdi)
1317          ret
1318 
1319         .p2align 4
1320 L(P4Q9): mov    %rax, -0x4c(%rdi)
1321 L(P4Q8): mov    %rax, -0x44(%rdi)
1322 L(P4Q7): mov    %rax, -0x3c(%rdi)
1323 L(P4Q6): mov    %rax, -0x34(%rdi)
1324 L(P4Q5): mov    %rax, -0x2c(%rdi)
1325 L(P4Q4): mov    %rax, -0x24(%rdi)
1326 L(P4Q3): mov    %rax, -0x1c(%rdi)
1327 L(P4Q2): mov    %rax, -0x14(%rdi)
1328 L(P4Q1): mov    %rax, -0xc(%rdi)
1329 L(P4Q0): mov    %eax, -0x4(%rdi)
1330          ret
1331 
1332         .p2align 4
1333 L(P5Q9): mov    %rax, -0x4d(%rdi)
1334 L(P5Q8): mov    %rax, -0x45(%rdi)
1335 L(P5Q7): mov    %rax, -0x3d(%rdi)
1336 L(P5Q6): mov    %rax, -0x35(%rdi)
1337 L(P5Q5): mov    %rax, -0x2d(%rdi)
1338 L(P5Q4): mov    %rax, -0x25(%rdi)
1339 L(P5Q3): mov    %rax, -0x1d(%rdi)
1340 L(P5Q2): mov    %rax, -0x15(%rdi)
1341 L(P5Q1): mov    %rax, -0xd(%rdi)
1342 L(P5Q0): mov    %eax, -0x5(%rdi)
1343          mov    %al, -0x1(%rdi)
1344          ret
1345 
1346         .p2align 4
1347 L(P6Q9): mov    %rax, -0x4e(%rdi)
1348 L(P6Q8): mov    %rax, -0x46(%rdi)
1349 L(P6Q7): mov    %rax, -0x3e(%rdi)
1350 L(P6Q6): mov    %rax, -0x36(%rdi)
1351 L(P6Q5): mov    %rax, -0x2e(%rdi)
1352 L(P6Q4): mov    %rax, -0x26(%rdi)
1353 L(P6Q3): mov    %rax, -0x1e(%rdi)
1354 L(P6Q2): mov    %rax, -0x16(%rdi)
1355 L(P6Q1): mov    %rax, -0xe(%rdi)
1356 L(P6Q0): mov    %eax, -0x6(%rdi)
1357          mov    %ax, -0x2(%rdi)
1358          ret
1359 
1360         .p2align 4
1361 L(P7Q9): mov    %rax, -0x4f(%rdi)
1362 L(P7Q8): mov    %rax, -0x47(%rdi)
1363 L(P7Q7): mov    %rax, -0x3f(%rdi)
1364 L(P7Q6): mov    %rax, -0x37(%rdi)
1365 L(P7Q5): mov    %rax, -0x2f(%rdi)
1366 L(P7Q4): mov    %rax, -0x27(%rdi)
1367 L(P7Q3): mov    %rax, -0x1f(%rdi)
1368 L(P7Q2): mov    %rax, -0x17(%rdi)
1369 L(P7Q1): mov    %rax, -0xf(%rdi)
1370 L(P7Q0): mov    %eax, -0x7(%rdi)
1371          mov    %ax, -0x3(%rdi)
1372          mov    %al, -0x1(%rdi)
1373          ret
1374 
1375         /*
1376          * Align to a 16-byte boundary. Avoids penalties from unaligned stores
1377          * as well as from stores spanning cachelines. Note 16-byte alignment
1378          * is better in case where rep sstosq is used.
1379          */
1380         .p2align 4
1381 L(ck_align):
1382         test    $0xf, %rdi
1383         jz      L(aligned_now)
1384         test    $1, %rdi
1385         jz      2f
1386         mov     %al, (%rdi)
1387         dec     %rsi
1388         lea     1(%rdi),%rdi
1389 2:
1390         test    $2, %rdi
1391         jz      4f
1392         mov     %ax, (%rdi)
1393         sub     $2, %rsi
1394         lea     2(%rdi),%rdi
1395 4:
1396         test    $4, %rdi
1397         jz      8f
1398         mov     %eax, (%rdi)
1399         sub     $4, %rsi
1400         lea     4(%rdi),%rdi
1401 8:
1402         test    $8, %rdi
1403         jz      L(aligned_now)
1404         mov     %rax, (%rdi)
1405         sub     $8, %rsi
1406         lea     8(%rdi),%rdi
1407 
1408         /*
1409          * For large sizes rep sstoq is fastest.
1410          * Transition point determined experimentally as measured on
1411          * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
1412          */
1413 L(aligned_now):
1414         cmp     $BZERO_USE_REP, %rsi
1415         ja      L(use_rep)
1416 
1417         /*
1418          * zero 64-bytes per loop
1419          */
1420         .p2align 4
1421 L(bzero_loop):
1422         leaq    -0x40(%rsi), %rsi
1423         cmpq    $0x40, %rsi
1424         movq    %rax, (%rdi) 
1425         movq    %rax, 0x8(%rdi) 
1426         movq    %rax, 0x10(%rdi) 
1427         movq    %rax, 0x18(%rdi) 
1428         movq    %rax, 0x20(%rdi) 
1429         movq    %rax, 0x28(%rdi) 
1430         movq    %rax, 0x30(%rdi) 
1431         movq    %rax, 0x38(%rdi) 
1432         leaq    0x40(%rdi), %rdi
1433         jae     L(bzero_loop)
1434 
1435         /*
1436          * Clear any remaining bytes..
1437          */
1438 9:
1439         leaq    L(setPxQx)(%rip), %r10
1440         addq    %rsi, %rdi
1441         movslq  (%r10,%rsi,4), %rcx
1442         leaq    (%rcx,%r10,1), %r10
1443         jmpq    *%r10
1444 
1445         /*
1446          * Use rep sstoq. Clear any remainder via unrolled code
1447          */
1448         .p2align 4
1449 L(use_rep):
1450         movq    %rsi, %rcx              /* get size in bytes */
1451         shrq    $3, %rcx                /* count of 8-byte words to zero */
1452         rep
1453           sstoq                         /* %rcx = words to clear (%rax=0) */
1454         andq    $7, %rsi                /* remaining bytes */
1455         jnz     9b
1456         ret
1457 #undef  L
1458         SET_SIZE(bzero_altentry)
1459         SET_SIZE(bzero)
1460 
1461 #elif defined(__i386)
1462 
1463 #define ARG_ADDR        4
1464 #define ARG_COUNT       8
1465 
1466         ENTRY(bzero)
1467 #ifdef DEBUG
1468         movl    postbootkernelbase, %eax
1469         cmpl    %eax, ARG_ADDR(%esp)
1470         jnb     0f
1471         pushl   %ebp
1472         movl    %esp, %ebp
1473         pushl   $.bzero_panic_msg
1474         call    panic
1475 0:
1476 #endif
1477 do_zero:
1478         movl    %edi, %edx
1479         movl    ARG_COUNT(%esp), %ecx
1480         movl    ARG_ADDR(%esp), %edi
1481         shrl    $2, %ecx
1482         xorl    %eax, %eax
1483         rep
1484           sstol
1485         movl    ARG_COUNT(%esp), %ecx
1486         andl    $3, %ecx
1487         rep
1488           sstob
1489         movl    %edx, %edi
1490         ret
1491         SET_SIZE(bzero)
1492 
1493 #undef  ARG_ADDR
1494 #undef  ARG_COUNT
1495 
1496 #endif  /* __i386 */
1497 #endif  /* __lint */
1498 
1499 /*
1500  * Transfer data to and from user space -
1501  * Note that these routines can cause faults
1502  * It is assumed that the kernel has nothing at
1503  * less than KERNELBASE in the virtual address space.
1504  *
1505  * Note that copyin(9F) and copyout(9F) are part of the
1506  * DDI/DKI which specifies that they return '-1' on "errors."
1507  *
1508  * Sigh.
1509  *
1510  * So there's two extremely similar routines - xcopyin_nta() and
1511  * xcopyout_nta() which return the errno that we've faithfully computed.
1512  * This allows other callers (e.g. uiomove(9F)) to work correctly.
1513  * Given that these are used pretty heavily, we expand the calling
1514  * sequences inline for all flavours (rather than making wrappers).
1515  */
1516 
1517 /*
1518  * Copy user data to kernel space.
1519  */
1520 
1521 #if defined(__lint)
1522 
1523 /* ARGSUSED */
1524 int
1525 copyin(const void *uaddr, void *kaddr, size_t count)
1526 { return (0); }
1527 
1528 #else   /* lint */
1529 
1530 #if defined(__amd64)
1531 
1532         ENTRY(copyin)
1533         pushq   %rbp
1534         movq    %rsp, %rbp
1535         subq    $24, %rsp
1536 
1537         /*
1538          * save args in case we trap and need to rerun as a copyop
1539          */
1540         movq    %rdi, (%rsp)
1541         movq    %rsi, 0x8(%rsp)
1542         movq    %rdx, 0x10(%rsp)
1543 
1544         movq    kernelbase(%rip), %rax
1545 #ifdef DEBUG
1546         cmpq    %rax, %rsi              /* %rsi = kaddr */
1547         jnb     1f
1548         leaq    .copyin_panic_msg(%rip), %rdi
1549         xorl    %eax, %eax
1550         call    panic
1551 1:
1552 #endif
1553         /*
1554          * pass lofault value as 4th argument to do_copy_fault
1555          */
1556         leaq    _copyin_err(%rip), %rcx
1557 
1558         movq    %gs:CPU_THREAD, %r9
1559         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1560         jae     3f                      /* take copyop if uaddr > kernelbase */
1561         SMAP_DISABLE_INSTR(0)
1562         jmp     do_copy_fault           /* Takes care of leave for us */
1563 
1564 _copyin_err:
1565         SMAP_ENABLE_INSTR(2)
1566         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */  
1567         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1568 3:
1569         movq    T_COPYOPS(%r9), %rax
1570         cmpq    $0, %rax
1571         jz      2f
1572         /*
1573          * reload args for the copyop
1574          */
1575         movq    (%rsp), %rdi
1576         movq    0x8(%rsp), %rsi
1577         movq    0x10(%rsp), %rdx
1578         leave
1579         jmp     *CP_COPYIN(%rax)
1580 
1581 2:      movl    $-1, %eax       
1582         leave
1583         ret
1584         SET_SIZE(copyin)
1585 
1586 #elif defined(__i386)
1587 
1588 #define ARG_UADDR       4
1589 #define ARG_KADDR       8
1590 
1591         ENTRY(copyin)
1592         movl    kernelbase, %ecx
1593 #ifdef DEBUG
1594         cmpl    %ecx, ARG_KADDR(%esp)
1595         jnb     1f
1596         pushl   %ebp
1597         movl    %esp, %ebp
1598         pushl   $.copyin_panic_msg
1599         call    panic
1600 1:
1601 #endif
1602         lea     _copyin_err, %eax
1603 
1604         movl    %gs:CPU_THREAD, %edx
1605         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1606         jb      do_copy_fault
1607         jmp     3f
1608 
1609 _copyin_err:
1610         popl    %ecx
1611         popl    %edi
1612         movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1613         popl    %esi
1614         popl    %ebp
1615 3:
1616         movl    T_COPYOPS(%edx), %eax
1617         cmpl    $0, %eax
1618         jz      2f
1619         jmp     *CP_COPYIN(%eax)
1620 
1621 2:      movl    $-1, %eax
1622         ret
1623         SET_SIZE(copyin)
1624 
1625 #undef  ARG_UADDR
1626 #undef  ARG_KADDR
1627 
1628 #endif  /* __i386 */
1629 #endif  /* __lint */
1630 
1631 #if defined(__lint)
1632 
1633 /* ARGSUSED */
1634 int
1635 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached)
1636 { return (0); }
1637 
1638 #else   /* __lint */
1639 
1640 #if defined(__amd64)
1641 
1642         ENTRY(xcopyin_nta)
1643         pushq   %rbp
1644         movq    %rsp, %rbp
1645         subq    $24, %rsp
1646 
1647         /*
1648          * save args in case we trap and need to rerun as a copyop
1649          * %rcx is consumed in this routine so we don't need to save
1650          * it.
1651          */
1652         movq    %rdi, (%rsp)
1653         movq    %rsi, 0x8(%rsp)
1654         movq    %rdx, 0x10(%rsp)
1655 
1656         movq    kernelbase(%rip), %rax
1657 #ifdef DEBUG
1658         cmpq    %rax, %rsi              /* %rsi = kaddr */
1659         jnb     1f
1660         leaq    .xcopyin_panic_msg(%rip), %rdi
1661         xorl    %eax, %eax
1662         call    panic
1663 1:
1664 #endif
1665         movq    %gs:CPU_THREAD, %r9
1666         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1667         jae     4f
1668         cmpq    $0, %rcx                /* No non-temporal access? */
1669         /*
1670          * pass lofault value as 4th argument to do_copy_fault
1671          */
1672         leaq    _xcopyin_err(%rip), %rcx        /* doesn't set rflags */
1673         jnz     6f                      /* use regular access */
1674         /*
1675          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1676          */
1677         cmpq    $XCOPY_MIN_SIZE, %rdx
1678         jae     5f
1679 6:
1680         SMAP_DISABLE_INSTR(1)
1681         jmp     do_copy_fault
1682         
1683         /*
1684          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1685          * count is COUNT_ALIGN_SIZE aligned.
1686          */
1687 5:
1688         movq    %rdi, %r10
1689         orq     %rsi, %r10
1690         andq    $NTA_ALIGN_MASK, %r10
1691         orq     %rdx, %r10
1692         andq    $COUNT_ALIGN_MASK, %r10
1693         jnz     6b      
1694         leaq    _xcopyin_nta_err(%rip), %rcx    /* doesn't set rflags */
1695         SMAP_DISABLE_INSTR(2)
1696         jmp     do_copy_fault_nta       /* use non-temporal access */
1697         
1698 4:
1699         movl    $EFAULT, %eax
1700         jmp     3f
1701 
1702         /*
1703          * A fault during do_copy_fault or do_copy_fault_nta is
1704          * indicated through an errno value in %rax and we iret from the
1705          * trap handler to here.
1706          */
1707 _xcopyin_err:
1708         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1709 _xcopyin_nta_err:
1710         SMAP_ENABLE_INSTR(3)
1711         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1712 3:
1713         movq    T_COPYOPS(%r9), %r8
1714         cmpq    $0, %r8
1715         jz      2f
1716 
1717         /*
1718          * reload args for the copyop
1719          */
1720         movq    (%rsp), %rdi
1721         movq    0x8(%rsp), %rsi
1722         movq    0x10(%rsp), %rdx
1723         leave
1724         jmp     *CP_XCOPYIN(%r8)
1725 
1726 2:      leave
1727         ret
1728         SET_SIZE(xcopyin_nta)
1729 
1730 #elif defined(__i386)
1731 
1732 #define ARG_UADDR       4
1733 #define ARG_KADDR       8
1734 #define ARG_COUNT       12
1735 #define ARG_CACHED      16
1736 
1737         .globl  use_sse_copy
1738 
1739         ENTRY(xcopyin_nta)
1740         movl    kernelbase, %ecx
1741         lea     _xcopyin_err, %eax
1742         movl    %gs:CPU_THREAD, %edx
1743         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1744         jae     4f
1745 
1746         cmpl    $0, use_sse_copy        /* no sse support */
1747         jz      do_copy_fault
1748 
1749         cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
1750         jnz     do_copy_fault
1751 
1752         /*
1753          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1754          */
1755         cmpl    $XCOPY_MIN_SIZE, ARG_COUNT(%esp)
1756         jb      do_copy_fault
1757         
1758         /*
1759          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1760          * count is COUNT_ALIGN_SIZE aligned.
1761          */
1762         movl    ARG_UADDR(%esp), %ecx
1763         orl     ARG_KADDR(%esp), %ecx
1764         andl    $NTA_ALIGN_MASK, %ecx
1765         orl     ARG_COUNT(%esp), %ecx
1766         andl    $COUNT_ALIGN_MASK, %ecx
1767         jnz     do_copy_fault
1768 
1769         jmp     do_copy_fault_nta       /* use regular access */
1770 
1771 4:
1772         movl    $EFAULT, %eax
1773         jmp     3f
1774 
1775         /*
1776          * A fault during do_copy_fault or do_copy_fault_nta is
1777          * indicated through an errno value in %eax and we iret from the
1778          * trap handler to here.
1779          */
1780 _xcopyin_err:
1781         popl    %ecx
1782         popl    %edi
1783         movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1784         popl    %esi
1785         popl    %ebp
1786 3:
1787         cmpl    $0, T_COPYOPS(%edx)
1788         jz      2f
1789         movl    T_COPYOPS(%edx), %eax
1790         jmp     *CP_XCOPYIN(%eax)
1791 
1792 2:      rep;    ret     /* use 2 byte return instruction when branch target */
1793                         /* AMD Software Optimization Guide - Section 6.2 */
1794         SET_SIZE(xcopyin_nta)
1795 
1796 #undef  ARG_UADDR
1797 #undef  ARG_KADDR
1798 #undef  ARG_COUNT
1799 #undef  ARG_CACHED
1800 
1801 #endif  /* __i386 */
1802 #endif  /* __lint */
1803 
1804 /*
1805  * Copy kernel data to user space.
1806  */
1807 
1808 #if defined(__lint)
1809 
1810 /* ARGSUSED */
1811 int
1812 copyout(const void *kaddr, void *uaddr, size_t count)
1813 { return (0); }
1814 
1815 #else   /* __lint */
1816 
1817 #if defined(__amd64)
1818 
1819         ENTRY(copyout)
1820         pushq   %rbp
1821         movq    %rsp, %rbp
1822         subq    $24, %rsp
1823 
1824         /*
1825          * save args in case we trap and need to rerun as a copyop
1826          */
1827         movq    %rdi, (%rsp)
1828         movq    %rsi, 0x8(%rsp)
1829         movq    %rdx, 0x10(%rsp)
1830 
1831         movq    kernelbase(%rip), %rax
1832 #ifdef DEBUG
1833         cmpq    %rax, %rdi              /* %rdi = kaddr */
1834         jnb     1f
1835         leaq    .copyout_panic_msg(%rip), %rdi
1836         xorl    %eax, %eax
1837         call    panic
1838 1:
1839 #endif
1840         /*
1841          * pass lofault value as 4th argument to do_copy_fault
1842          */
1843         leaq    _copyout_err(%rip), %rcx
1844 
1845         movq    %gs:CPU_THREAD, %r9
1846         cmpq    %rax, %rsi              /* test uaddr < kernelbase */
1847         jae     3f                      /* take copyop if uaddr > kernelbase */
1848         SMAP_DISABLE_INSTR(3)
1849         jmp     do_copy_fault           /* Calls leave for us */
1850 
1851 _copyout_err:
1852         SMAP_ENABLE_INSTR(4)
1853         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1854         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1855 3:
1856         movq    T_COPYOPS(%r9), %rax
1857         cmpq    $0, %rax
1858         jz      2f
1859 
1860         /*
1861          * reload args for the copyop
1862          */
1863         movq    (%rsp), %rdi
1864         movq    0x8(%rsp), %rsi
1865         movq    0x10(%rsp), %rdx
1866         leave
1867         jmp     *CP_COPYOUT(%rax)
1868 
1869 2:      movl    $-1, %eax
1870         leave
1871         ret
1872         SET_SIZE(copyout)
1873 
1874 #elif defined(__i386)
1875 
1876 #define ARG_KADDR       4
1877 #define ARG_UADDR       8
1878 
1879         ENTRY(copyout)
1880         movl    kernelbase, %ecx
1881 #ifdef DEBUG
1882         cmpl    %ecx, ARG_KADDR(%esp)
1883         jnb     1f
1884         pushl   %ebp
1885         movl    %esp, %ebp
1886         pushl   $.copyout_panic_msg
1887         call    panic
1888 1:
1889 #endif
1890         lea     _copyout_err, %eax
1891         movl    %gs:CPU_THREAD, %edx
1892         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1893         jb      do_copy_fault
1894         jmp     3f
1895         
1896 _copyout_err:
1897         popl    %ecx
1898         popl    %edi
1899         movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1900         popl    %esi
1901         popl    %ebp
1902 3:
1903         movl    T_COPYOPS(%edx), %eax
1904         cmpl    $0, %eax
1905         jz      2f
1906         jmp     *CP_COPYOUT(%eax)
1907 
1908 2:      movl    $-1, %eax
1909         ret
1910         SET_SIZE(copyout)
1911 
1912 #undef  ARG_UADDR
1913 #undef  ARG_KADDR
1914 
1915 #endif  /* __i386 */
1916 #endif  /* __lint */
1917 
1918 #if defined(__lint)
1919 
1920 /* ARGSUSED */
1921 int
1922 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached)
1923 { return (0); }
1924 
1925 #else   /* __lint */
1926 
1927 #if defined(__amd64)
1928 
1929         ENTRY(xcopyout_nta)
1930         pushq   %rbp
1931         movq    %rsp, %rbp
1932         subq    $24, %rsp
1933 
1934         /*
1935          * save args in case we trap and need to rerun as a copyop
1936          */
1937         movq    %rdi, (%rsp)
1938         movq    %rsi, 0x8(%rsp)
1939         movq    %rdx, 0x10(%rsp)
1940 
1941         movq    kernelbase(%rip), %rax
1942 #ifdef DEBUG
1943         cmpq    %rax, %rdi              /* %rdi = kaddr */
1944         jnb     1f
1945         leaq    .xcopyout_panic_msg(%rip), %rdi
1946         xorl    %eax, %eax
1947         call    panic
1948 1:
1949 #endif
1950         movq    %gs:CPU_THREAD, %r9
1951         cmpq    %rax, %rsi              /* test uaddr < kernelbase */
1952         jae     4f
1953 
1954         cmpq    $0, %rcx                /* No non-temporal access? */
1955         /*
1956          * pass lofault value as 4th argument to do_copy_fault
1957          */
1958         leaq    _xcopyout_err(%rip), %rcx
1959         jnz     6f
1960         /*
1961          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1962          */
1963         cmpq    $XCOPY_MIN_SIZE, %rdx
1964         jae     5f
1965 6:
1966         SMAP_DISABLE_INSTR(4)
1967         jmp     do_copy_fault
1968         
1969         /*
1970          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1971          * count is COUNT_ALIGN_SIZE aligned.
1972          */
1973 5:
1974         movq    %rdi, %r10
1975         orq     %rsi, %r10
1976         andq    $NTA_ALIGN_MASK, %r10
1977         orq     %rdx, %r10
1978         andq    $COUNT_ALIGN_MASK, %r10
1979         jnz     6b      
1980         leaq    _xcopyout_nta_err(%rip), %rcx
1981         SMAP_DISABLE_INSTR(5)
1982         call    do_copy_fault_nta
1983         SMAP_ENABLE_INSTR(5)
1984         ret
1985 
1986 4:
1987         movl    $EFAULT, %eax
1988         jmp     3f
1989 
1990         /*
1991          * A fault during do_copy_fault or do_copy_fault_nta is
1992          * indicated through an errno value in %rax and we iret from the
1993          * trap handler to here.
1994          */
1995 _xcopyout_err:
1996         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1997 _xcopyout_nta_err:
1998         SMAP_ENABLE_INSTR(6)
1999         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2000 3:
2001         movq    T_COPYOPS(%r9), %r8
2002         cmpq    $0, %r8
2003         jz      2f
2004 
2005         /*
2006          * reload args for the copyop
2007          */
2008         movq    (%rsp), %rdi
2009         movq    0x8(%rsp), %rsi
2010         movq    0x10(%rsp), %rdx
2011         leave
2012         jmp     *CP_XCOPYOUT(%r8)
2013 
2014 2:      leave
2015         ret
2016         SET_SIZE(xcopyout_nta)
2017 
2018 #elif defined(__i386)
2019 
2020 #define ARG_KADDR       4
2021 #define ARG_UADDR       8
2022 #define ARG_COUNT       12
2023 #define ARG_CACHED      16
2024 
2025         ENTRY(xcopyout_nta)
2026         movl    kernelbase, %ecx
2027         lea     _xcopyout_err, %eax
2028         movl    %gs:CPU_THREAD, %edx
2029         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2030         jae     4f
2031 
2032         cmpl    $0, use_sse_copy        /* no sse support */
2033         jz      do_copy_fault
2034 
2035         cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
2036         jnz     do_copy_fault
2037 
2038         /*
2039          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
2040          */
2041         cmpl    $XCOPY_MIN_SIZE, %edx
2042         jb      do_copy_fault
2043         
2044         /*
2045          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2046          * count is COUNT_ALIGN_SIZE aligned.
2047          */
2048         movl    ARG_UADDR(%esp), %ecx
2049         orl     ARG_KADDR(%esp), %ecx
2050         andl    $NTA_ALIGN_MASK, %ecx
2051         orl     ARG_COUNT(%esp), %ecx
2052         andl    $COUNT_ALIGN_MASK, %ecx
2053         jnz     do_copy_fault
2054         jmp     do_copy_fault_nta
2055 
2056 4:
2057         movl    $EFAULT, %eax
2058         jmp     3f
2059 
2060         /*
2061          * A fault during do_copy_fault or do_copy_fault_nta is
2062          * indicated through an errno value in %eax and we iret from the
2063          * trap handler to here.
2064          */
2065 _xcopyout_err:
2066         / restore the original lofault
2067         popl    %ecx
2068         popl    %edi
2069         movl    %ecx, T_LOFAULT(%edx)   / original lofault
2070         popl    %esi
2071         popl    %ebp
2072 3:
2073         cmpl    $0, T_COPYOPS(%edx)
2074         jz      2f
2075         movl    T_COPYOPS(%edx), %eax
2076         jmp     *CP_XCOPYOUT(%eax)
2077 
2078 2:      rep;    ret     /* use 2 byte return instruction when branch target */
2079                         /* AMD Software Optimization Guide - Section 6.2 */
2080         SET_SIZE(xcopyout_nta)
2081 
2082 #undef  ARG_UADDR
2083 #undef  ARG_KADDR
2084 #undef  ARG_COUNT
2085 #undef  ARG_CACHED
2086 
2087 #endif  /* __i386 */
2088 #endif  /* __lint */
2089 
2090 /*
2091  * Copy a null terminated string from one point to another in
2092  * the kernel address space.
2093  */
2094 
2095 #if defined(__lint)
2096 
2097 /* ARGSUSED */
2098 int
2099 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied)
2100 { return (0); }
2101 
2102 #else   /* __lint */
2103 
2104 #if defined(__amd64)
2105 
2106         ENTRY(copystr)
2107         pushq   %rbp
2108         movq    %rsp, %rbp
2109 #ifdef DEBUG
2110         movq    kernelbase(%rip), %rax
2111         cmpq    %rax, %rdi              /* %rdi = from */
2112         jb      0f
2113         cmpq    %rax, %rsi              /* %rsi = to */
2114         jnb     1f
2115 0:      leaq    .copystr_panic_msg(%rip), %rdi
2116         xorl    %eax, %eax
2117         call    panic
2118 1:
2119 #endif
2120         movq    %gs:CPU_THREAD, %r9
2121         movq    T_LOFAULT(%r9), %r8     /* pass current lofault value as */
2122                                         /* 5th argument to do_copystr */
2123         xorl    %r10d,%r10d             /* pass smap restore need in %r10d */
2124                                         /* as a non-ABI 6th arg */
2125 do_copystr:
2126         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
2127         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
2128         movq    %r8, T_LOFAULT(%r9)     /* new lofault */
2129 
2130         movq    %rdx, %r8               /* save maxlength */
2131 
2132         cmpq    $0, %rdx                /* %rdx = maxlength */
2133         je      copystr_enametoolong    /* maxlength == 0 */
2134 
2135 copystr_loop:
2136         decq    %r8
2137         movb    (%rdi), %al
2138         incq    %rdi
2139         movb    %al, (%rsi)
2140         incq    %rsi
2141         cmpb    $0, %al
2142         je      copystr_null            /* null char */
2143         cmpq    $0, %r8
2144         jne     copystr_loop
2145 
2146 copystr_enametoolong:
2147         movl    $ENAMETOOLONG, %eax
2148         jmp     copystr_out
2149 
2150 copystr_null:
2151         xorl    %eax, %eax              /* no error */
2152 
2153 copystr_out:
2154         cmpq    $0, %rcx                /* want length? */
2155         je      copystr_smap            /* no */
2156         subq    %r8, %rdx               /* compute length and store it */
2157         movq    %rdx, (%rcx)
2158 
2159 copystr_smap:
2160         cmpl    $0, %r10d
2161         jz      copystr_done
2162         SMAP_ENABLE_INSTR(7)
2163 
2164 copystr_done:
2165         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2166         leave
2167         ret
2168         SET_SIZE(copystr)
2169 
2170 #elif defined(__i386)
2171 
2172 #define ARG_FROM        8
2173 #define ARG_TO          12
2174 #define ARG_MAXLEN      16
2175 #define ARG_LENCOPIED   20
2176 
2177         ENTRY(copystr)
2178 #ifdef DEBUG
2179         pushl   %ebp
2180         movl    %esp, %ebp
2181         movl    kernelbase, %eax
2182         cmpl    %eax, ARG_FROM(%esp)
2183         jb      0f
2184         cmpl    %eax, ARG_TO(%esp)
2185         jnb     1f
2186 0:      pushl   $.copystr_panic_msg
2187         call    panic
2188 1:      popl    %ebp
2189 #endif
2190         /* get the current lofault address */
2191         movl    %gs:CPU_THREAD, %eax
2192         movl    T_LOFAULT(%eax), %eax
2193 do_copystr:
2194         pushl   %ebp                    /* setup stack frame */
2195         movl    %esp, %ebp
2196         pushl   %ebx                    /* save registers */
2197         pushl   %edi
2198 
2199         movl    %gs:CPU_THREAD, %ebx    
2200         movl    T_LOFAULT(%ebx), %edi
2201         pushl   %edi                    /* save the current lofault */
2202         movl    %eax, T_LOFAULT(%ebx)   /* new lofault */
2203 
2204         movl    ARG_MAXLEN(%ebp), %ecx
2205         cmpl    $0, %ecx
2206         je      copystr_enametoolong    /* maxlength == 0 */
2207 
2208         movl    ARG_FROM(%ebp), %ebx    /* source address */
2209         movl    ARG_TO(%ebp), %edx      /* destination address */
2210 
2211 copystr_loop:
2212         decl    %ecx
2213         movb    (%ebx), %al
2214         incl    %ebx    
2215         movb    %al, (%edx)
2216         incl    %edx
2217         cmpb    $0, %al
2218         je      copystr_null            /* null char */
2219         cmpl    $0, %ecx
2220         jne     copystr_loop
2221 
2222 copystr_enametoolong:
2223         movl    $ENAMETOOLONG, %eax
2224         jmp     copystr_out
2225 
2226 copystr_null:
2227         xorl    %eax, %eax              /* no error */
2228 
2229 copystr_out:
2230         cmpl    $0, ARG_LENCOPIED(%ebp) /* want length? */
2231         je      copystr_done            /* no */
2232         movl    ARG_MAXLEN(%ebp), %edx
2233         subl    %ecx, %edx              /* compute length and store it */
2234         movl    ARG_LENCOPIED(%ebp), %ecx
2235         movl    %edx, (%ecx)
2236 
2237 copystr_done:
2238         popl    %edi
2239         movl    %gs:CPU_THREAD, %ebx    
2240         movl    %edi, T_LOFAULT(%ebx)   /* restore the original lofault */
2241 
2242         popl    %edi
2243         popl    %ebx
2244         popl    %ebp
2245         ret     
2246         SET_SIZE(copystr)
2247 
2248 #undef  ARG_FROM
2249 #undef  ARG_TO
2250 #undef  ARG_MAXLEN
2251 #undef  ARG_LENCOPIED
2252 
2253 #endif  /* __i386 */
2254 #endif  /* __lint */
2255 
2256 /*
2257  * Copy a null terminated string from the user address space into
2258  * the kernel address space.
2259  */
2260 
2261 #if defined(__lint)
2262 
2263 /* ARGSUSED */
2264 int
2265 copyinstr(const char *uaddr, char *kaddr, size_t maxlength,
2266     size_t *lencopied)
2267 { return (0); }
2268 
2269 #else   /* __lint */
2270 
2271 #if defined(__amd64)
2272 
2273         ENTRY(copyinstr)
2274         pushq   %rbp
2275         movq    %rsp, %rbp
2276         subq    $32, %rsp
2277 
2278         /*
2279          * save args in case we trap and need to rerun as a copyop
2280          */
2281         movq    %rdi, (%rsp)
2282         movq    %rsi, 0x8(%rsp)
2283         movq    %rdx, 0x10(%rsp)
2284         movq    %rcx, 0x18(%rsp)
2285 
2286         movq    kernelbase(%rip), %rax
2287 #ifdef DEBUG
2288         cmpq    %rax, %rsi              /* %rsi = kaddr */
2289         jnb     1f
2290         leaq    .copyinstr_panic_msg(%rip), %rdi
2291         xorl    %eax, %eax
2292         call    panic
2293 1:
2294 #endif
2295         /*
2296          * pass lofault value as 5th argument to do_copystr
2297          * do_copystr expects whether or not we need smap in %r10d
2298          */
2299         leaq    _copyinstr_error(%rip), %r8
2300         movl    $1, %r10d
2301 
2302         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
2303         jae     4f
2304         SMAP_DISABLE_INSTR(6)
2305         jmp     do_copystr
2306 4:
2307         movq    %gs:CPU_THREAD, %r9
2308         jmp     3f
2309 
2310 _copyinstr_error:
2311         SMAP_ENABLE_INSTR(8)
2312         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2313 3:
2314         movq    T_COPYOPS(%r9), %rax
2315         cmpq    $0, %rax
2316         jz      2f
2317 
2318         /*
2319          * reload args for the copyop
2320          */
2321         movq    (%rsp), %rdi
2322         movq    0x8(%rsp), %rsi
2323         movq    0x10(%rsp), %rdx
2324         movq    0x18(%rsp), %rcx
2325         leave
2326         jmp     *CP_COPYINSTR(%rax)
2327         
2328 2:      movl    $EFAULT, %eax           /* return EFAULT */
2329         leave
2330         ret
2331         SET_SIZE(copyinstr)
2332 
2333 #elif defined(__i386)
2334 
2335 #define ARG_UADDR       4
2336 #define ARG_KADDR       8
2337 
2338         ENTRY(copyinstr)
2339         movl    kernelbase, %ecx
2340 #ifdef DEBUG
2341         cmpl    %ecx, ARG_KADDR(%esp)
2342         jnb     1f
2343         pushl   %ebp
2344         movl    %esp, %ebp
2345         pushl   $.copyinstr_panic_msg
2346         call    panic
2347 1:
2348 #endif
2349         lea     _copyinstr_error, %eax
2350         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2351         jb      do_copystr
2352         movl    %gs:CPU_THREAD, %edx
2353         jmp     3f
2354 
2355 _copyinstr_error:
2356         popl    %edi
2357         movl    %gs:CPU_THREAD, %edx    
2358         movl    %edi, T_LOFAULT(%edx)   /* original lofault */
2359 
2360         popl    %edi
2361         popl    %ebx
2362         popl    %ebp
2363 3:
2364         movl    T_COPYOPS(%edx), %eax
2365         cmpl    $0, %eax
2366         jz      2f
2367         jmp     *CP_COPYINSTR(%eax)
2368         
2369 2:      movl    $EFAULT, %eax           /* return EFAULT */
2370         ret
2371         SET_SIZE(copyinstr)
2372 
2373 #undef  ARG_UADDR
2374 #undef  ARG_KADDR
2375 
2376 #endif  /* __i386 */
2377 #endif  /* __lint */
2378 
2379 /*
2380  * Copy a null terminated string from the kernel
2381  * address space to the user address space.
2382  */
2383 
2384 #if defined(__lint)
2385 
2386 /* ARGSUSED */
2387 int
2388 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength,
2389     size_t *lencopied)
2390 { return (0); }
2391 
2392 #else   /* __lint */
2393 
2394 #if defined(__amd64)
2395 
2396         ENTRY(copyoutstr)
2397         pushq   %rbp
2398         movq    %rsp, %rbp
2399         subq    $32, %rsp
2400 
2401         /*
2402          * save args in case we trap and need to rerun as a copyop
2403          */
2404         movq    %rdi, (%rsp)
2405         movq    %rsi, 0x8(%rsp)
2406         movq    %rdx, 0x10(%rsp)
2407         movq    %rcx, 0x18(%rsp)
2408 
2409         movq    kernelbase(%rip), %rax
2410 #ifdef DEBUG
2411         cmpq    %rax, %rdi              /* %rdi = kaddr */
2412         jnb     1f
2413         leaq    .copyoutstr_panic_msg(%rip), %rdi
2414         jmp     call_panic              /* setup stack and call panic */
2415 1:
2416 #endif
2417         /*
2418          * pass lofault value as 5th argument to do_copystr
2419          * pass one as 6th argument to do_copystr in %r10d
2420          */
2421         leaq    _copyoutstr_error(%rip), %r8
2422         movl    $1, %r10d
2423 
2424         cmpq    %rax, %rsi              /* test uaddr < kernelbase */
2425         jae     4f
2426         SMAP_DISABLE_INSTR(7)
2427         jmp     do_copystr
2428 4:
2429         movq    %gs:CPU_THREAD, %r9
2430         jmp     3f
2431 
2432 _copyoutstr_error:
2433         SMAP_ENABLE_INSTR(9)
2434         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2435 3:
2436         movq    T_COPYOPS(%r9), %rax
2437         cmpq    $0, %rax
2438         jz      2f
2439 
2440         /*
2441          * reload args for the copyop
2442          */
2443         movq    (%rsp), %rdi
2444         movq    0x8(%rsp), %rsi
2445         movq    0x10(%rsp), %rdx
2446         movq    0x18(%rsp), %rcx
2447         leave
2448         jmp     *CP_COPYOUTSTR(%rax)
2449         
2450 2:      movl    $EFAULT, %eax           /* return EFAULT */
2451         leave
2452         ret
2453         SET_SIZE(copyoutstr)    
2454         
2455 #elif defined(__i386)
2456 
2457 #define ARG_KADDR       4
2458 #define ARG_UADDR       8
2459 
2460         ENTRY(copyoutstr)
2461         movl    kernelbase, %ecx
2462 #ifdef DEBUG
2463         cmpl    %ecx, ARG_KADDR(%esp)
2464         jnb     1f
2465         pushl   %ebp
2466         movl    %esp, %ebp
2467         pushl   $.copyoutstr_panic_msg
2468         call    panic
2469 1:
2470 #endif
2471         lea     _copyoutstr_error, %eax
2472         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2473         jb      do_copystr
2474         movl    %gs:CPU_THREAD, %edx
2475         jmp     3f
2476 
2477 _copyoutstr_error:
2478         popl    %edi
2479         movl    %gs:CPU_THREAD, %edx    
2480         movl    %edi, T_LOFAULT(%edx)   /* restore the original lofault */
2481 
2482         popl    %edi
2483         popl    %ebx
2484         popl    %ebp
2485 3:
2486         movl    T_COPYOPS(%edx), %eax
2487         cmpl    $0, %eax
2488         jz      2f
2489         jmp     *CP_COPYOUTSTR(%eax)
2490 
2491 2:      movl    $EFAULT, %eax           /* return EFAULT */
2492         ret
2493         SET_SIZE(copyoutstr)
2494         
2495 #undef  ARG_KADDR
2496 #undef  ARG_UADDR
2497 
2498 #endif  /* __i386 */
2499 #endif  /* __lint */
2500 
2501 /*
2502  * Since all of the fuword() variants are so similar, we have a macro to spit
2503  * them out.  This allows us to create DTrace-unobservable functions easily.
2504  */
2505         
2506 #if defined(__lint)
2507 
2508 #if defined(__amd64)
2509 
2510 /* ARGSUSED */
2511 int
2512 fuword64(const void *addr, uint64_t *dst)
2513 { return (0); }
2514 
2515 #endif
2516 
2517 /* ARGSUSED */
2518 int
2519 fuword32(const void *addr, uint32_t *dst)
2520 { return (0); }
2521 
2522 /* ARGSUSED */
2523 int
2524 fuword16(const void *addr, uint16_t *dst)
2525 { return (0); }
2526 
2527 /* ARGSUSED */
2528 int
2529 fuword8(const void *addr, uint8_t *dst)
2530 { return (0); }
2531 
2532 #else   /* __lint */
2533 
2534 #if defined(__amd64)
2535 
2536 /*
2537  * Note that we don't save and reload the arguments here
2538  * because their values are not altered in the copy path.
2539  * Additionally, when successful, the smap_enable jmp will
2540  * actually return us to our original caller.
2541  */
2542 
2543 #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2544         ENTRY(NAME)                             \
2545         movq    %gs:CPU_THREAD, %r9;            \
2546         cmpq    kernelbase(%rip), %rdi;         \
2547         jae     1f;                             \
2548         leaq    _flt_/**/NAME, %rdx;            \
2549         movq    %rdx, T_LOFAULT(%r9);           \
2550         SMAP_DISABLE_INSTR(DISNUM)              \
2551         INSTR   (%rdi), REG;                    \
2552         movq    $0, T_LOFAULT(%r9);             \
2553         INSTR   REG, (%rsi);                    \
2554         xorl    %eax, %eax;                     \
2555         SMAP_ENABLE_INSTR(EN1)                  \
2556         ret;                                    \
2557 _flt_/**/NAME:                                  \
2558         SMAP_ENABLE_INSTR(EN2)                  \
2559         movq    $0, T_LOFAULT(%r9);             \
2560 1:                                              \
2561         movq    T_COPYOPS(%r9), %rax;           \
2562         cmpq    $0, %rax;                       \
2563         jz      2f;                             \
2564         jmp     *COPYOP(%rax);                  \
2565 2:                                              \
2566         movl    $-1, %eax;                      \
2567         ret;                                    \
2568         SET_SIZE(NAME)
2569         
2570         FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2571         FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2572         FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2573         FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2574 
2575 #elif defined(__i386)
2576 
2577 #define FUWORD(NAME, INSTR, REG, COPYOP)        \
2578         ENTRY(NAME)                             \
2579         movl    %gs:CPU_THREAD, %ecx;           \
2580         movl    kernelbase, %eax;               \
2581         cmpl    %eax, 4(%esp);                  \
2582         jae     1f;                             \
2583         lea     _flt_/**/NAME, %edx;            \
2584         movl    %edx, T_LOFAULT(%ecx);          \
2585         movl    4(%esp), %eax;                  \
2586         movl    8(%esp), %edx;                  \
2587         INSTR   (%eax), REG;                    \
2588         movl    $0, T_LOFAULT(%ecx);            \
2589         INSTR   REG, (%edx);                    \
2590         xorl    %eax, %eax;                     \
2591         ret;                                    \
2592 _flt_/**/NAME:                                  \
2593         movl    $0, T_LOFAULT(%ecx);            \
2594 1:                                              \
2595         movl    T_COPYOPS(%ecx), %eax;          \
2596         cmpl    $0, %eax;                       \
2597         jz      2f;                             \
2598         jmp     *COPYOP(%eax);                  \
2599 2:                                              \
2600         movl    $-1, %eax;                      \
2601         ret;                                    \
2602         SET_SIZE(NAME)
2603 
2604         FUWORD(fuword32, movl, %eax, CP_FUWORD32)
2605         FUWORD(fuword16, movw, %ax, CP_FUWORD16)
2606         FUWORD(fuword8, movb, %al, CP_FUWORD8)
2607 
2608 #endif  /* __i386 */
2609 
2610 #undef  FUWORD
2611 
2612 #endif  /* __lint */
2613 
2614 /*
2615  * Set user word.
2616  */
2617 
2618 #if defined(__lint)
2619 
2620 #if defined(__amd64)
2621 
2622 /* ARGSUSED */
2623 int
2624 suword64(void *addr, uint64_t value)
2625 { return (0); }
2626 
2627 #endif
2628 
2629 /* ARGSUSED */
2630 int
2631 suword32(void *addr, uint32_t value)
2632 { return (0); }
2633 
2634 /* ARGSUSED */
2635 int
2636 suword16(void *addr, uint16_t value)
2637 { return (0); }
2638 
2639 /* ARGSUSED */
2640 int
2641 suword8(void *addr, uint8_t value)
2642 { return (0); }
2643 
2644 #else   /* lint */
2645 
2646 #if defined(__amd64)
2647 
2648 /*
2649  * Note that we don't save and reload the arguments here
2650  * because their values are not altered in the copy path.
2651  */
2652 
2653 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2654         ENTRY(NAME)                             \
2655         movq    %gs:CPU_THREAD, %r9;            \
2656         cmpq    kernelbase(%rip), %rdi;         \
2657         jae     1f;                             \
2658         leaq    _flt_/**/NAME, %rdx;            \
2659         SMAP_DISABLE_INSTR(DISNUM)              \
2660         movq    %rdx, T_LOFAULT(%r9);           \
2661         INSTR   REG, (%rdi);                    \
2662         movq    $0, T_LOFAULT(%r9);             \
2663         xorl    %eax, %eax;                     \
2664         SMAP_ENABLE_INSTR(EN1)                  \
2665         ret;                                    \
2666 _flt_/**/NAME:                                  \
2667         SMAP_ENABLE_INSTR(EN2)                  \
2668         movq    $0, T_LOFAULT(%r9);             \
2669 1:                                              \
2670         movq    T_COPYOPS(%r9), %rax;           \
2671         cmpq    $0, %rax;                       \
2672         jz      3f;                             \
2673         jmp     *COPYOP(%rax);                  \
2674 3:                                              \
2675         movl    $-1, %eax;                      \
2676         ret;                                    \
2677         SET_SIZE(NAME)
2678 
2679         SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2680         SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2681         SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2682         SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2683 
2684 #elif defined(__i386)
2685 
2686 #define SUWORD(NAME, INSTR, REG, COPYOP)        \
2687         ENTRY(NAME)                             \
2688         movl    %gs:CPU_THREAD, %ecx;           \
2689         movl    kernelbase, %eax;               \
2690         cmpl    %eax, 4(%esp);                  \
2691         jae     1f;                             \
2692         lea     _flt_/**/NAME, %edx;            \
2693         movl    %edx, T_LOFAULT(%ecx);          \
2694         movl    4(%esp), %eax;                  \
2695         movl    8(%esp), %edx;                  \
2696         INSTR   REG, (%eax);                    \
2697         movl    $0, T_LOFAULT(%ecx);            \
2698         xorl    %eax, %eax;                     \
2699         ret;                                    \
2700 _flt_/**/NAME:                                  \
2701         movl    $0, T_LOFAULT(%ecx);            \
2702 1:                                              \
2703         movl    T_COPYOPS(%ecx), %eax;          \
2704         cmpl    $0, %eax;                       \
2705         jz      3f;                             \
2706         movl    COPYOP(%eax), %ecx;             \
2707         jmp     *%ecx;                          \
2708 3:                                              \
2709         movl    $-1, %eax;                      \
2710         ret;                                    \
2711         SET_SIZE(NAME)
2712 
2713         SUWORD(suword32, movl, %edx, CP_SUWORD32)
2714         SUWORD(suword16, movw, %dx, CP_SUWORD16)
2715         SUWORD(suword8, movb, %dl, CP_SUWORD8)
2716 
2717 #endif  /* __i386 */
2718 
2719 #undef  SUWORD
2720 
2721 #endif  /* __lint */
2722 
2723 #if defined(__lint)
2724 
2725 #if defined(__amd64)
2726 
2727 /*ARGSUSED*/
2728 void
2729 fuword64_noerr(const void *addr, uint64_t *dst)
2730 {}
2731 
2732 #endif
2733 
2734 /*ARGSUSED*/
2735 void
2736 fuword32_noerr(const void *addr, uint32_t *dst)
2737 {}
2738 
2739 /*ARGSUSED*/
2740 void
2741 fuword8_noerr(const void *addr, uint8_t *dst)
2742 {}
2743 
2744 /*ARGSUSED*/
2745 void
2746 fuword16_noerr(const void *addr, uint16_t *dst)
2747 {}
2748 
2749 #else   /* __lint */
2750 
2751 #if defined(__amd64)
2752 
2753 #define FUWORD_NOERR(NAME, INSTR, REG)          \
2754         ENTRY(NAME)                             \
2755         cmpq    kernelbase(%rip), %rdi;         \
2756         cmovnbq kernelbase(%rip), %rdi;         \
2757         INSTR   (%rdi), REG;                    \
2758         INSTR   REG, (%rsi);                    \
2759         ret;                                    \
2760         SET_SIZE(NAME)
2761 
2762         FUWORD_NOERR(fuword64_noerr, movq, %rax)
2763         FUWORD_NOERR(fuword32_noerr, movl, %eax)
2764         FUWORD_NOERR(fuword16_noerr, movw, %ax)
2765         FUWORD_NOERR(fuword8_noerr, movb, %al)
2766 
2767 #elif defined(__i386)
2768 
2769 #define FUWORD_NOERR(NAME, INSTR, REG)          \
2770         ENTRY(NAME)                             \
2771         movl    4(%esp), %eax;                  \
2772         cmpl    kernelbase, %eax;               \
2773         jb      1f;                             \
2774         movl    kernelbase, %eax;               \
2775 1:      movl    8(%esp), %edx;                  \
2776         INSTR   (%eax), REG;                    \
2777         INSTR   REG, (%edx);                    \
2778         ret;                                    \
2779         SET_SIZE(NAME)
2780 
2781         FUWORD_NOERR(fuword32_noerr, movl, %ecx)
2782         FUWORD_NOERR(fuword16_noerr, movw, %cx)
2783         FUWORD_NOERR(fuword8_noerr, movb, %cl)
2784 
2785 #endif  /* __i386 */
2786 
2787 #undef  FUWORD_NOERR
2788 
2789 #endif  /* __lint */
2790 
2791 #if defined(__lint)
2792 
2793 #if defined(__amd64)
2794 
2795 /*ARGSUSED*/
2796 void
2797 suword64_noerr(void *addr, uint64_t value)
2798 {}
2799 
2800 #endif
2801 
2802 /*ARGSUSED*/
2803 void
2804 suword32_noerr(void *addr, uint32_t value)
2805 {}
2806 
2807 /*ARGSUSED*/
2808 void
2809 suword16_noerr(void *addr, uint16_t value)
2810 {}
2811 
2812 /*ARGSUSED*/
2813 void
2814 suword8_noerr(void *addr, uint8_t value)
2815 {}
2816 
2817 #else   /* lint */
2818 
2819 #if defined(__amd64)
2820 
2821 #define SUWORD_NOERR(NAME, INSTR, REG)          \
2822         ENTRY(NAME)                             \
2823         cmpq    kernelbase(%rip), %rdi;         \
2824         cmovnbq kernelbase(%rip), %rdi;         \
2825         INSTR   REG, (%rdi);                    \
2826         ret;                                    \
2827         SET_SIZE(NAME)
2828 
2829         SUWORD_NOERR(suword64_noerr, movq, %rsi)
2830         SUWORD_NOERR(suword32_noerr, movl, %esi)
2831         SUWORD_NOERR(suword16_noerr, movw, %si)
2832         SUWORD_NOERR(suword8_noerr, movb, %sil)
2833 
2834 #elif defined(__i386)
2835 
2836 #define SUWORD_NOERR(NAME, INSTR, REG)          \
2837         ENTRY(NAME)                             \
2838         movl    4(%esp), %eax;                  \
2839         cmpl    kernelbase, %eax;               \
2840         jb      1f;                             \
2841         movl    kernelbase, %eax;               \
2842 1:                                              \
2843         movl    8(%esp), %edx;                  \
2844         INSTR   REG, (%eax);                    \
2845         ret;                                    \
2846         SET_SIZE(NAME)
2847 
2848         SUWORD_NOERR(suword32_noerr, movl, %edx)
2849         SUWORD_NOERR(suword16_noerr, movw, %dx)
2850         SUWORD_NOERR(suword8_noerr, movb, %dl)
2851 
2852 #endif  /* __i386 */
2853 
2854 #undef  SUWORD_NOERR
2855 
2856 #endif  /* lint */
2857 
2858 
2859 #if defined(__lint)
2860 
2861 /*ARGSUSED*/
2862 int
2863 subyte(void *addr, uchar_t value)
2864 { return (0); }
2865 
2866 /*ARGSUSED*/
2867 void
2868 subyte_noerr(void *addr, uchar_t value)
2869 {}
2870 
2871 /*ARGSUSED*/
2872 int
2873 fulword(const void *addr, ulong_t *valuep)
2874 { return (0); }
2875 
2876 /*ARGSUSED*/
2877 void
2878 fulword_noerr(const void *addr, ulong_t *valuep)
2879 {}
2880 
2881 /*ARGSUSED*/
2882 int
2883 sulword(void *addr, ulong_t valuep)
2884 { return (0); }
2885 
2886 /*ARGSUSED*/
2887 void
2888 sulword_noerr(void *addr, ulong_t valuep)
2889 {}
2890 
2891 #else
2892 
2893         .weak   subyte
2894         subyte=suword8
2895         .weak   subyte_noerr
2896         subyte_noerr=suword8_noerr
2897 
2898 #if defined(__amd64)
2899 
2900         .weak   fulword
2901         fulword=fuword64
2902         .weak   fulword_noerr
2903         fulword_noerr=fuword64_noerr
2904         .weak   sulword
2905         sulword=suword64
2906         .weak   sulword_noerr
2907         sulword_noerr=suword64_noerr
2908 
2909 #elif defined(__i386)
2910 
2911         .weak   fulword
2912         fulword=fuword32
2913         .weak   fulword_noerr
2914         fulword_noerr=fuword32_noerr
2915         .weak   sulword
2916         sulword=suword32
2917         .weak   sulword_noerr
2918         sulword_noerr=suword32_noerr
2919 
2920 #endif /* __i386 */
2921 
2922 #endif /* __lint */
2923 
2924 #if defined(__lint)
2925 
2926 /*
2927  * Copy a block of storage - must not overlap (from + len <= to).
2928  * No fault handler installed (to be called under on_fault())
2929  */
2930 
2931 /* ARGSUSED */
2932 void
2933 copyout_noerr(const void *kfrom, void *uto, size_t count)
2934 {}
2935 
2936 /* ARGSUSED */
2937 void
2938 copyin_noerr(const void *ufrom, void *kto, size_t count)
2939 {}
2940 
2941 /*
2942  * Zero a block of storage in user space
2943  */
2944 
2945 /* ARGSUSED */
2946 void
2947 uzero(void *addr, size_t count)
2948 {}
2949 
2950 /*
2951  * copy a block of storage in user space
2952  */
2953 
2954 /* ARGSUSED */
2955 void
2956 ucopy(const void *ufrom, void *uto, size_t ulength)
2957 {}
2958 
2959 /*
2960  * copy a string in user space
2961  */
2962 
2963 /* ARGSUSED */
2964 void
2965 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied)
2966 {}
2967 
2968 #else /* __lint */
2969 
2970 #if defined(__amd64)
2971 
2972         ENTRY(copyin_noerr)
2973         movq    kernelbase(%rip), %rax
2974 #ifdef DEBUG
2975         cmpq    %rax, %rsi              /* %rsi = kto */
2976         jae     1f
2977         leaq    .cpyin_ne_pmsg(%rip), %rdi
2978         jmp     call_panic              /* setup stack and call panic */
2979 1:
2980 #endif
2981         cmpq    %rax, %rdi              /* ufrom < kernelbase */
2982         jb      do_copy
2983         movq    %rax, %rdi              /* force fault at kernelbase */
2984         jmp     do_copy
2985         SET_SIZE(copyin_noerr)
2986 
2987         ENTRY(copyout_noerr)
2988         movq    kernelbase(%rip), %rax
2989 #ifdef DEBUG
2990         cmpq    %rax, %rdi              /* %rdi = kfrom */
2991         jae     1f
2992         leaq    .cpyout_ne_pmsg(%rip), %rdi
2993         jmp     call_panic              /* setup stack and call panic */
2994 1:
2995 #endif
2996         cmpq    %rax, %rsi              /* uto < kernelbase */
2997         jb      do_copy
2998         movq    %rax, %rsi              /* force fault at kernelbase */
2999         jmp     do_copy
3000         SET_SIZE(copyout_noerr)
3001 
3002         ENTRY(uzero)
3003         movq    kernelbase(%rip), %rax
3004         cmpq    %rax, %rdi
3005         jb      do_zero
3006         movq    %rax, %rdi      /* force fault at kernelbase */
3007         jmp     do_zero
3008         SET_SIZE(uzero)
3009 
3010         ENTRY(ucopy)
3011         movq    kernelbase(%rip), %rax
3012         cmpq    %rax, %rdi
3013         cmovaeq %rax, %rdi      /* force fault at kernelbase */
3014         cmpq    %rax, %rsi
3015         cmovaeq %rax, %rsi      /* force fault at kernelbase */
3016         jmp     do_copy
3017         SET_SIZE(ucopy)
3018 
3019         /*
3020          * Note, the frame pointer is required here becuase do_copystr expects
3021          * to be able to pop it off!
3022          */
3023         ENTRY(ucopystr)
3024         pushq   %rbp
3025         movq    %rsp, %rbp
3026         movq    kernelbase(%rip), %rax
3027         cmpq    %rax, %rdi
3028         cmovaeq %rax, %rdi      /* force fault at kernelbase */
3029         cmpq    %rax, %rsi
3030         cmovaeq %rax, %rsi      /* force fault at kernelbase */
3031         /* do_copystr expects lofault address in %r8 */
3032         /* do_copystr expects whether or not we need smap in %r10 */
3033         xorl    %r10d, %r10d
3034         movq    %gs:CPU_THREAD, %r8
3035         movq    T_LOFAULT(%r8), %r8
3036         jmp     do_copystr
3037         SET_SIZE(ucopystr)
3038 
3039 #elif defined(__i386)
3040 
3041         ENTRY(copyin_noerr)
3042         movl    kernelbase, %eax
3043 #ifdef DEBUG
3044         cmpl    %eax, 8(%esp)
3045         jae     1f
3046         pushl   $.cpyin_ne_pmsg
3047         call    panic
3048 1:
3049 #endif
3050         cmpl    %eax, 4(%esp)
3051         jb      do_copy
3052         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3053         jmp     do_copy
3054         SET_SIZE(copyin_noerr)
3055 
3056         ENTRY(copyout_noerr)
3057         movl    kernelbase, %eax
3058 #ifdef DEBUG
3059         cmpl    %eax, 4(%esp)
3060         jae     1f
3061         pushl   $.cpyout_ne_pmsg
3062         call    panic
3063 1:
3064 #endif
3065         cmpl    %eax, 8(%esp)
3066         jb      do_copy
3067         movl    %eax, 8(%esp)   /* force fault at kernelbase */
3068         jmp     do_copy
3069         SET_SIZE(copyout_noerr)
3070 
3071         ENTRY(uzero)
3072         movl    kernelbase, %eax
3073         cmpl    %eax, 4(%esp)
3074         jb      do_zero
3075         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3076         jmp     do_zero
3077         SET_SIZE(uzero)
3078 
3079         ENTRY(ucopy)
3080         movl    kernelbase, %eax
3081         cmpl    %eax, 4(%esp)
3082         jb      1f
3083         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3084 1:
3085         cmpl    %eax, 8(%esp)
3086         jb      do_copy
3087         movl    %eax, 8(%esp)   /* force fault at kernelbase */
3088         jmp     do_copy
3089         SET_SIZE(ucopy)
3090 
3091         ENTRY(ucopystr)
3092         movl    kernelbase, %eax
3093         cmpl    %eax, 4(%esp)
3094         jb      1f
3095         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3096 1:
3097         cmpl    %eax, 8(%esp)
3098         jb      2f
3099         movl    %eax, 8(%esp)   /* force fault at kernelbase */
3100 2:
3101         /* do_copystr expects the lofault address in %eax */
3102         movl    %gs:CPU_THREAD, %eax
3103         movl    T_LOFAULT(%eax), %eax
3104         jmp     do_copystr
3105         SET_SIZE(ucopystr)
3106 
3107 #endif  /* __i386 */
3108 
3109 #ifdef DEBUG
3110         .data
3111 .kcopy_panic_msg:
3112         .string "kcopy: arguments below kernelbase"
3113 .bcopy_panic_msg:
3114         .string "bcopy: arguments below kernelbase"
3115 .kzero_panic_msg:
3116         .string "kzero: arguments below kernelbase"
3117 .bzero_panic_msg:
3118         .string "bzero: arguments below kernelbase"
3119 .copyin_panic_msg:
3120         .string "copyin: kaddr argument below kernelbase"
3121 .xcopyin_panic_msg:
3122         .string "xcopyin: kaddr argument below kernelbase"
3123 .copyout_panic_msg:
3124         .string "copyout: kaddr argument below kernelbase"
3125 .xcopyout_panic_msg:
3126         .string "xcopyout: kaddr argument below kernelbase"
3127 .copystr_panic_msg:
3128         .string "copystr: arguments in user space"
3129 .copyinstr_panic_msg:
3130         .string "copyinstr: kaddr argument not in kernel address space"
3131 .copyoutstr_panic_msg:
3132         .string "copyoutstr: kaddr argument not in kernel address space"
3133 .cpyin_ne_pmsg:
3134         .string "copyin_noerr: argument not in kernel address space"
3135 .cpyout_ne_pmsg:
3136         .string "copyout_noerr: argument not in kernel address space"
3137 #endif
3138 
3139 #endif  /* __lint */
3140 
3141 #ifndef __lint
3142 
3143 .data
3144 .align  4
3145 .globl  _smap_enable_patch_count
3146 .type   _smap_enable_patch_count,@object
3147 .size   _smap_enable_patch_count, 4
3148 _smap_enable_patch_count:
3149         .long   SMAP_ENABLE_COUNT
3150 
3151 .globl  _smap_disable_patch_count
3152 .type   _smap_disable_patch_count,@object
3153 .size   _smap_disable_patch_count, 4
3154 _smap_disable_patch_count:
3155         .long SMAP_DISABLE_COUNT
3156 
3157 #endif /* __lint */