1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2009, Intel Corporation
  28  * All rights reserved.
  29  */
  30 
  31 /*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.        */
  32 /*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T              */
  33 /*         All Rights Reserved                                          */
  34 
  35 /*       Copyright (c) 1987, 1988 Microsoft Corporation                 */
  36 /*         All Rights Reserved                                          */
  37 
  38 /*
  39  * Copyright 2019 Joyent, Inc.
  40  */
  41 
  42 #include <sys/errno.h>
  43 #include <sys/asm_linkage.h>
  44 
  45 #if defined(__lint)
  46 #include <sys/types.h>
  47 #include <sys/systm.h>
  48 #else   /* __lint */
  49 #include "assym.h"
  50 #endif  /* __lint */
  51 
  52 #define KCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  53 #define XCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  54 /*
  55  * Non-temopral access (NTA) alignment requirement
  56  */
  57 #define NTA_ALIGN_SIZE  4       /* Must be at least 4-byte aligned */
  58 #define NTA_ALIGN_MASK  _CONST(NTA_ALIGN_SIZE-1)
  59 #define COUNT_ALIGN_SIZE        16      /* Must be at least 16-byte aligned */
  60 #define COUNT_ALIGN_MASK        _CONST(COUNT_ALIGN_SIZE-1)
  61 
  62 /*
  63  * With the introduction of Broadwell, Intel has introduced supervisor mode
  64  * access protection -- SMAP. SMAP forces the kernel to set certain bits to
  65  * enable access of user pages (AC in rflags, defines as PS_ACHK in
  66  * <sys/psw.h>). One of the challenges is that the implementation of many of the
  67  * userland copy routines directly use the kernel ones. For example, copyin and
  68  * copyout simply go and jump to the do_copy_fault label and traditionally let
  69  * those deal with the return for them. In fact, changing that is a can of frame
  70  * pointers.
  71  *
  72  * Rules and Constraints:
  73  *
  74  * 1. For anything that's not in copy.s, we have it do explicit calls to the
  75  * smap related code. It usually is in a position where it is able to. This is
  76  * restricted to the following three places: DTrace, resume() in swtch.s and
  77  * on_fault/no_fault. If you want to add it somewhere else, we should be
  78  * thinking twice.
  79  *
  80  * 2. We try to toggle this at the smallest window possible. This means that if
  81  * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
  82  * other function, we will always leave with SMAP enabled (the kernel cannot
  83  * access user pages).
  84  *
  85  * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
  86  * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
  87  * which already takes care of ensuring that SMAP is enabled and disabled. Note
  88  * this means that when under an on_fault()/no_fault() handler, one must not
  89  * call the non-*_noeer() routines.
  90  *
  91  * 4. The first thing we should do after coming out of an lofault handler is to
  92  * make sure that we call smap_enable again to ensure that we are safely
  93  * protected, as more often than not, we will have disabled smap to get there.
  94  *
  95  * 5. The SMAP functions, smap_enable and smap_disable may not touch any
  96  * registers beyond those done by the call and ret. These routines may be called
  97  * from arbitrary contexts in copy.s where we have slightly more special ABIs in
  98  * place.
  99  *
 100  * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
 101  * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and
 102  * smap_disable()). If the number of these is changed, you must update the
 103  * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
 104  *
 105  * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is
 106  * no known technical reason preventing it from being enabled.
 107  *
 108  * 8. Generally this .s file is processed by a K&R style cpp. This means that it
 109  * really has a lot of feelings about whitespace. In particular, if you have a
 110  * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
 111  *
 112  * 9. The smap_enable and smap_disable functions should not generally be called.
 113  * They exist such that DTrace and on_trap() may use them, that's it.
 114  *
 115  * 10. In general, the kernel has its own value for rflags that gets used. This
 116  * is maintained in a few different places which vary based on how the thread
 117  * comes into existence and whether it's a user thread. In general, when the
 118  * kernel takes a trap, it always will set ourselves to a known set of flags,
 119  * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
 120  * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
 121  * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
 122  * where that gets masked off.
 123  */
 124 
 125 /*
 126  * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
 127  * "rep smovq" for large sizes. Performance data shows that many calls to
 128  * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
 129  * these small sizes unrolled code is used. For medium sizes loops writing
 130  * 64-bytes per loop are used. Transition points were determined experimentally.
 131  */
 132 #define BZERO_USE_REP   (1024)
 133 #define BCOPY_DFLT_REP  (128)
 134 #define BCOPY_NHM_REP   (768)
 135 
 136 /*
 137  * Copy a block of storage, returning an error code if `from' or
 138  * `to' takes a kernel pagefault which cannot be resolved.
 139  * Returns errno value on pagefault error, 0 if all ok
 140  */
 141 
 142 /*
 143  * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
 144  * additional call instructions.
 145  */
 146 #if defined(__amd64)
 147 #define SMAP_DISABLE_COUNT      16
 148 #define SMAP_ENABLE_COUNT       26
 149 #elif defined(__i386)
 150 #define SMAP_DISABLE_COUNT      0
 151 #define SMAP_ENABLE_COUNT       0
 152 #endif
 153 
 154 #define SMAP_DISABLE_INSTR(ITER)                \
 155         .globl  _smap_disable_patch_/**/ITER;   \
 156         _smap_disable_patch_/**/ITER/**/:;      \
 157         nop; nop; nop;
 158 
 159 #define SMAP_ENABLE_INSTR(ITER)                 \
 160         .globl  _smap_enable_patch_/**/ITER;    \
 161         _smap_enable_patch_/**/ITER/**/:;       \
 162         nop; nop; nop;
 163 
 164 #if defined(__lint)
 165 
 166 /* ARGSUSED */
 167 int
 168 kcopy(const void *from, void *to, size_t count)
 169 { return (0); }
 170 
 171 #else   /* __lint */
 172 
 173         .globl  kernelbase
 174         .globl  postbootkernelbase
 175 
 176 #if defined(__amd64)
 177 
 178         ENTRY(kcopy)
 179         pushq   %rbp
 180         movq    %rsp, %rbp
 181 #ifdef DEBUG
 182         cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 183         jb      0f
 184         cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 185         jnb     1f
 186 0:      leaq    .kcopy_panic_msg(%rip), %rdi
 187         xorl    %eax, %eax
 188         call    panic
 189 1:
 190 #endif
 191         /*
 192          * pass lofault value as 4th argument to do_copy_fault
 193          */
 194         leaq    _kcopy_copyerr(%rip), %rcx
 195         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
 196 
 197 do_copy_fault:
 198         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
 199         movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
 200         call    bcopy_altentry
 201         xorl    %eax, %eax              /* return 0 (success) */
 202         SMAP_ENABLE_INSTR(0)
 203 
 204         /*
 205          * A fault during do_copy_fault is indicated through an errno value
 206          * in %rax and we iretq from the trap handler to here.
 207          */
 208 _kcopy_copyerr:
 209         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
 210         leave
 211         ret
 212         SET_SIZE(kcopy)
 213 
 214 #elif defined(__i386)
 215 
 216 #define ARG_FROM        8
 217 #define ARG_TO          12
 218 #define ARG_COUNT       16
 219 
 220         ENTRY(kcopy)
 221 #ifdef DEBUG
 222         pushl   %ebp
 223         movl    %esp, %ebp
 224         movl    postbootkernelbase, %eax
 225         cmpl    %eax, ARG_FROM(%ebp)
 226         jb      0f
 227         cmpl    %eax, ARG_TO(%ebp)
 228         jnb     1f
 229 0:      pushl   $.kcopy_panic_msg
 230         call    panic
 231 1:      popl    %ebp
 232 #endif
 233         lea     _kcopy_copyerr, %eax    /* lofault value */
 234         movl    %gs:CPU_THREAD, %edx
 235 
 236 do_copy_fault:
 237         pushl   %ebp
 238         movl    %esp, %ebp              /* setup stack frame */
 239         pushl   %esi
 240         pushl   %edi                    /* save registers */
 241 
 242         movl    T_LOFAULT(%edx), %edi
 243         pushl   %edi                    /* save the current lofault */
 244         movl    %eax, T_LOFAULT(%edx)   /* new lofault */
 245 
 246         movl    ARG_COUNT(%ebp), %ecx
 247         movl    ARG_FROM(%ebp), %esi
 248         movl    ARG_TO(%ebp), %edi
 249         shrl    $2, %ecx                /* word count */
 250         rep
 251           smovl
 252         movl    ARG_COUNT(%ebp), %ecx
 253         andl    $3, %ecx                /* bytes left over */
 254         rep
 255           smovb
 256         xorl    %eax, %eax
 257 
 258         /*
 259          * A fault during do_copy_fault is indicated through an errno value
 260          * in %eax and we iret from the trap handler to here.
 261          */
 262 _kcopy_copyerr:
 263         popl    %ecx
 264         popl    %edi
 265         movl    %ecx, T_LOFAULT(%edx)   /* restore the original lofault */
 266         popl    %esi
 267         popl    %ebp
 268         ret
 269         SET_SIZE(kcopy)
 270 
 271 #undef  ARG_FROM
 272 #undef  ARG_TO
 273 #undef  ARG_COUNT
 274 
 275 #endif  /* __i386 */
 276 #endif  /* __lint */
 277 
 278 #if defined(__lint)
 279 
 280 /*
 281  * Copy a block of storage.  Similar to kcopy but uses non-temporal
 282  * instructions.
 283  */
 284 
 285 /* ARGSUSED */
 286 int
 287 kcopy_nta(const void *from, void *to, size_t count, int copy_cached)
 288 { return (0); }
 289 
 290 #else   /* __lint */
 291 
 292 #if defined(__amd64)
 293 
 294 #define COPY_LOOP_INIT(src, dst, cnt)   \
 295         addq    cnt, src;                       \
 296         addq    cnt, dst;                       \
 297         shrq    $3, cnt;                        \
 298         neg     cnt
 299 
 300         /* Copy 16 bytes per loop.  Uses %rax and %r8 */
 301 #define COPY_LOOP_BODY(src, dst, cnt)   \
 302         prefetchnta     0x100(src, cnt, 8);     \
 303         movq    (src, cnt, 8), %rax;            \
 304         movq    0x8(src, cnt, 8), %r8;          \
 305         movnti  %rax, (dst, cnt, 8);            \
 306         movnti  %r8, 0x8(dst, cnt, 8);          \
 307         addq    $2, cnt
 308 
 309         ENTRY(kcopy_nta)
 310         pushq   %rbp
 311         movq    %rsp, %rbp
 312 #ifdef DEBUG
 313         cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 314         jb      0f
 315         cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 316         jnb     1f
 317 0:      leaq    .kcopy_panic_msg(%rip), %rdi
 318         xorl    %eax, %eax
 319         call    panic
 320 1:
 321 #endif
 322 
 323         movq    %gs:CPU_THREAD, %r9
 324         cmpq    $0, %rcx                /* No non-temporal access? */
 325         /*
 326          * pass lofault value as 4th argument to do_copy_fault
 327          */
 328         leaq    _kcopy_nta_copyerr(%rip), %rcx  /* doesn't set rflags */
 329         jnz     do_copy_fault           /* use regular access */
 330         /*
 331          * Make sure cnt is >= KCOPY_MIN_SIZE
 332          */
 333         cmpq    $KCOPY_MIN_SIZE, %rdx
 334         jb      do_copy_fault
 335 
 336         /*
 337          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
 338          * count is COUNT_ALIGN_SIZE aligned.
 339          */
 340         movq    %rdi, %r10
 341         orq     %rsi, %r10
 342         andq    $NTA_ALIGN_MASK, %r10
 343         orq     %rdx, %r10
 344         andq    $COUNT_ALIGN_MASK, %r10
 345         jnz     do_copy_fault
 346 
 347         ALTENTRY(do_copy_fault_nta)
 348         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
 349         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
 350         movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
 351 
 352         /*
 353          * COPY_LOOP_BODY uses %rax and %r8
 354          */
 355         COPY_LOOP_INIT(%rdi, %rsi, %rdx)
 356 2:      COPY_LOOP_BODY(%rdi, %rsi, %rdx)
 357         jnz     2b
 358 
 359         mfence
 360         xorl    %eax, %eax              /* return 0 (success) */
 361         SMAP_ENABLE_INSTR(1)
 362 
 363 _kcopy_nta_copyerr:
 364         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
 365         leave
 366         ret
 367         SET_SIZE(do_copy_fault_nta)
 368         SET_SIZE(kcopy_nta)
 369 
 370 #elif defined(__i386)
 371 
 372 #define ARG_FROM        8
 373 #define ARG_TO          12
 374 #define ARG_COUNT       16
 375 
 376 #define COPY_LOOP_INIT(src, dst, cnt)   \
 377         addl    cnt, src;                       \
 378         addl    cnt, dst;                       \
 379         shrl    $3, cnt;                        \
 380         neg     cnt
 381 
 382 #define COPY_LOOP_BODY(src, dst, cnt)   \
 383         prefetchnta     0x100(src, cnt, 8);     \
 384         movl    (src, cnt, 8), %esi;            \
 385         movnti  %esi, (dst, cnt, 8);            \
 386         movl    0x4(src, cnt, 8), %esi;         \
 387         movnti  %esi, 0x4(dst, cnt, 8);         \
 388         movl    0x8(src, cnt, 8), %esi;         \
 389         movnti  %esi, 0x8(dst, cnt, 8);         \
 390         movl    0xc(src, cnt, 8), %esi;         \
 391         movnti  %esi, 0xc(dst, cnt, 8);         \
 392         addl    $2, cnt
 393 
 394         /*
 395          * kcopy_nta is not implemented for 32-bit as no performance
 396          * improvement was shown.  We simply jump directly to kcopy
 397          * and discard the 4 arguments.
 398          */
 399         ENTRY(kcopy_nta)
 400         jmp     kcopy
 401 
 402         lea     _kcopy_nta_copyerr, %eax        /* lofault value */
 403         ALTENTRY(do_copy_fault_nta)
 404         pushl   %ebp
 405         movl    %esp, %ebp              /* setup stack frame */
 406         pushl   %esi
 407         pushl   %edi
 408 
 409         movl    %gs:CPU_THREAD, %edx
 410         movl    T_LOFAULT(%edx), %edi
 411         pushl   %edi                    /* save the current lofault */
 412         movl    %eax, T_LOFAULT(%edx)   /* new lofault */
 413 
 414         /* COPY_LOOP_BODY needs to use %esi */
 415         movl    ARG_COUNT(%ebp), %ecx
 416         movl    ARG_FROM(%ebp), %edi
 417         movl    ARG_TO(%ebp), %eax
 418         COPY_LOOP_INIT(%edi, %eax, %ecx)
 419 1:      COPY_LOOP_BODY(%edi, %eax, %ecx)
 420         jnz     1b
 421         mfence
 422 
 423         xorl    %eax, %eax
 424 _kcopy_nta_copyerr:
 425         popl    %ecx
 426         popl    %edi
 427         movl    %ecx, T_LOFAULT(%edx)   /* restore the original lofault */
 428         popl    %esi
 429         leave
 430         ret
 431         SET_SIZE(do_copy_fault_nta)
 432         SET_SIZE(kcopy_nta)
 433 
 434 #undef  ARG_FROM
 435 #undef  ARG_TO
 436 #undef  ARG_COUNT
 437 
 438 #endif  /* __i386 */
 439 #endif  /* __lint */
 440 
 441 #if defined(__lint)
 442 
 443 /* ARGSUSED */
 444 void
 445 bcopy(const void *from, void *to, size_t count)
 446 {}
 447 
 448 #else   /* __lint */
 449 
 450 #if defined(__amd64)
 451 
 452         ENTRY(bcopy)
 453 #ifdef DEBUG
 454         orq     %rdx, %rdx              /* %rdx = count */
 455         jz      1f
 456         cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 457         jb      0f
 458         cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 459         jnb     1f
 460 0:      leaq    .bcopy_panic_msg(%rip), %rdi
 461         jmp     call_panic              /* setup stack and call panic */
 462 1:
 463 #endif
 464         /*
 465          * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
 466          * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
 467          * uses these registers in future they must be saved and restored.
 468          */
 469         ALTENTRY(bcopy_altentry)
 470 do_copy:
 471 #define L(s) .bcopy/**/s
 472         cmpq    $0x50, %rdx             /* 80 */
 473         jae     bcopy_ck_size
 474 
 475         /*
 476          * Performance data shows many caller's copy small buffers. So for
 477          * best perf for these sizes unrolled code is used. Store data without
 478          * worrying about alignment.
 479          */
 480         leaq    L(fwdPxQx)(%rip), %r10
 481         addq    %rdx, %rdi
 482         addq    %rdx, %rsi
 483         movslq  (%r10,%rdx,4), %rcx
 484         leaq    (%rcx,%r10,1), %r10
 485         INDIRECT_JMP_REG(r10)
 486 
 487         .p2align 4
 488 L(fwdPxQx):
 489         .int       L(P0Q0)-L(fwdPxQx)   /* 0 */
 490         .int       L(P1Q0)-L(fwdPxQx)
 491         .int       L(P2Q0)-L(fwdPxQx)
 492         .int       L(P3Q0)-L(fwdPxQx)
 493         .int       L(P4Q0)-L(fwdPxQx)
 494         .int       L(P5Q0)-L(fwdPxQx)
 495         .int       L(P6Q0)-L(fwdPxQx)
 496         .int       L(P7Q0)-L(fwdPxQx)
 497 
 498         .int       L(P0Q1)-L(fwdPxQx)   /* 8 */
 499         .int       L(P1Q1)-L(fwdPxQx)
 500         .int       L(P2Q1)-L(fwdPxQx)
 501         .int       L(P3Q1)-L(fwdPxQx)
 502         .int       L(P4Q1)-L(fwdPxQx)
 503         .int       L(P5Q1)-L(fwdPxQx)
 504         .int       L(P6Q1)-L(fwdPxQx)
 505         .int       L(P7Q1)-L(fwdPxQx)
 506 
 507         .int       L(P0Q2)-L(fwdPxQx)   /* 16 */
 508         .int       L(P1Q2)-L(fwdPxQx)
 509         .int       L(P2Q2)-L(fwdPxQx)
 510         .int       L(P3Q2)-L(fwdPxQx)
 511         .int       L(P4Q2)-L(fwdPxQx)
 512         .int       L(P5Q2)-L(fwdPxQx)
 513         .int       L(P6Q2)-L(fwdPxQx)
 514         .int       L(P7Q2)-L(fwdPxQx)
 515 
 516         .int       L(P0Q3)-L(fwdPxQx)   /* 24 */
 517         .int       L(P1Q3)-L(fwdPxQx)
 518         .int       L(P2Q3)-L(fwdPxQx)
 519         .int       L(P3Q3)-L(fwdPxQx)
 520         .int       L(P4Q3)-L(fwdPxQx)
 521         .int       L(P5Q3)-L(fwdPxQx)
 522         .int       L(P6Q3)-L(fwdPxQx)
 523         .int       L(P7Q3)-L(fwdPxQx)
 524 
 525         .int       L(P0Q4)-L(fwdPxQx)   /* 32 */
 526         .int       L(P1Q4)-L(fwdPxQx)
 527         .int       L(P2Q4)-L(fwdPxQx)
 528         .int       L(P3Q4)-L(fwdPxQx)
 529         .int       L(P4Q4)-L(fwdPxQx)
 530         .int       L(P5Q4)-L(fwdPxQx)
 531         .int       L(P6Q4)-L(fwdPxQx)
 532         .int       L(P7Q4)-L(fwdPxQx)
 533 
 534         .int       L(P0Q5)-L(fwdPxQx)   /* 40 */
 535         .int       L(P1Q5)-L(fwdPxQx)
 536         .int       L(P2Q5)-L(fwdPxQx)
 537         .int       L(P3Q5)-L(fwdPxQx)
 538         .int       L(P4Q5)-L(fwdPxQx)
 539         .int       L(P5Q5)-L(fwdPxQx)
 540         .int       L(P6Q5)-L(fwdPxQx)
 541         .int       L(P7Q5)-L(fwdPxQx)
 542 
 543         .int       L(P0Q6)-L(fwdPxQx)   /* 48 */
 544         .int       L(P1Q6)-L(fwdPxQx)
 545         .int       L(P2Q6)-L(fwdPxQx)
 546         .int       L(P3Q6)-L(fwdPxQx)
 547         .int       L(P4Q6)-L(fwdPxQx)
 548         .int       L(P5Q6)-L(fwdPxQx)
 549         .int       L(P6Q6)-L(fwdPxQx)
 550         .int       L(P7Q6)-L(fwdPxQx)
 551 
 552         .int       L(P0Q7)-L(fwdPxQx)   /* 56 */
 553         .int       L(P1Q7)-L(fwdPxQx)
 554         .int       L(P2Q7)-L(fwdPxQx)
 555         .int       L(P3Q7)-L(fwdPxQx)
 556         .int       L(P4Q7)-L(fwdPxQx)
 557         .int       L(P5Q7)-L(fwdPxQx)
 558         .int       L(P6Q7)-L(fwdPxQx)
 559         .int       L(P7Q7)-L(fwdPxQx)
 560 
 561         .int       L(P0Q8)-L(fwdPxQx)   /* 64 */
 562         .int       L(P1Q8)-L(fwdPxQx)
 563         .int       L(P2Q8)-L(fwdPxQx)
 564         .int       L(P3Q8)-L(fwdPxQx)
 565         .int       L(P4Q8)-L(fwdPxQx)
 566         .int       L(P5Q8)-L(fwdPxQx)
 567         .int       L(P6Q8)-L(fwdPxQx)
 568         .int       L(P7Q8)-L(fwdPxQx)
 569 
 570         .int       L(P0Q9)-L(fwdPxQx)   /* 72 */
 571         .int       L(P1Q9)-L(fwdPxQx)
 572         .int       L(P2Q9)-L(fwdPxQx)
 573         .int       L(P3Q9)-L(fwdPxQx)
 574         .int       L(P4Q9)-L(fwdPxQx)
 575         .int       L(P5Q9)-L(fwdPxQx)
 576         .int       L(P6Q9)-L(fwdPxQx)
 577         .int       L(P7Q9)-L(fwdPxQx)   /* 79 */
 578 
 579         .p2align 4
 580 L(P0Q9):
 581         mov    -0x48(%rdi), %rcx
 582         mov    %rcx, -0x48(%rsi)
 583 L(P0Q8):
 584         mov    -0x40(%rdi), %r10
 585         mov    %r10, -0x40(%rsi)
 586 L(P0Q7):
 587         mov    -0x38(%rdi), %r8
 588         mov    %r8, -0x38(%rsi)
 589 L(P0Q6):
 590         mov    -0x30(%rdi), %rcx
 591         mov    %rcx, -0x30(%rsi)
 592 L(P0Q5):
 593         mov    -0x28(%rdi), %r10
 594         mov    %r10, -0x28(%rsi)
 595 L(P0Q4):
 596         mov    -0x20(%rdi), %r8
 597         mov    %r8, -0x20(%rsi)
 598 L(P0Q3):
 599         mov    -0x18(%rdi), %rcx
 600         mov    %rcx, -0x18(%rsi)
 601 L(P0Q2):
 602         mov    -0x10(%rdi), %r10
 603         mov    %r10, -0x10(%rsi)
 604 L(P0Q1):
 605         mov    -0x8(%rdi), %r8
 606         mov    %r8, -0x8(%rsi)
 607 L(P0Q0):
 608         ret
 609 
 610         .p2align 4
 611 L(P1Q9):
 612         mov    -0x49(%rdi), %r8
 613         mov    %r8, -0x49(%rsi)
 614 L(P1Q8):
 615         mov    -0x41(%rdi), %rcx
 616         mov    %rcx, -0x41(%rsi)
 617 L(P1Q7):
 618         mov    -0x39(%rdi), %r10
 619         mov    %r10, -0x39(%rsi)
 620 L(P1Q6):
 621         mov    -0x31(%rdi), %r8
 622         mov    %r8, -0x31(%rsi)
 623 L(P1Q5):
 624         mov    -0x29(%rdi), %rcx
 625         mov    %rcx, -0x29(%rsi)
 626 L(P1Q4):
 627         mov    -0x21(%rdi), %r10
 628         mov    %r10, -0x21(%rsi)
 629 L(P1Q3):
 630         mov    -0x19(%rdi), %r8
 631         mov    %r8, -0x19(%rsi)
 632 L(P1Q2):
 633         mov    -0x11(%rdi), %rcx
 634         mov    %rcx, -0x11(%rsi)
 635 L(P1Q1):
 636         mov    -0x9(%rdi), %r10
 637         mov    %r10, -0x9(%rsi)
 638 L(P1Q0):
 639         movzbq -0x1(%rdi), %r8
 640         mov    %r8b, -0x1(%rsi)
 641         ret
 642 
 643         .p2align 4
 644 L(P2Q9):
 645         mov    -0x4a(%rdi), %r8
 646         mov    %r8, -0x4a(%rsi)
 647 L(P2Q8):
 648         mov    -0x42(%rdi), %rcx
 649         mov    %rcx, -0x42(%rsi)
 650 L(P2Q7):
 651         mov    -0x3a(%rdi), %r10
 652         mov    %r10, -0x3a(%rsi)
 653 L(P2Q6):
 654         mov    -0x32(%rdi), %r8
 655         mov    %r8, -0x32(%rsi)
 656 L(P2Q5):
 657         mov    -0x2a(%rdi), %rcx
 658         mov    %rcx, -0x2a(%rsi)
 659 L(P2Q4):
 660         mov    -0x22(%rdi), %r10
 661         mov    %r10, -0x22(%rsi)
 662 L(P2Q3):
 663         mov    -0x1a(%rdi), %r8
 664         mov    %r8, -0x1a(%rsi)
 665 L(P2Q2):
 666         mov    -0x12(%rdi), %rcx
 667         mov    %rcx, -0x12(%rsi)
 668 L(P2Q1):
 669         mov    -0xa(%rdi), %r10
 670         mov    %r10, -0xa(%rsi)
 671 L(P2Q0):
 672         movzwq -0x2(%rdi), %r8
 673         mov    %r8w, -0x2(%rsi)
 674         ret
 675 
 676         .p2align 4
 677 L(P3Q9):
 678         mov    -0x4b(%rdi), %r8
 679         mov    %r8, -0x4b(%rsi)
 680 L(P3Q8):
 681         mov    -0x43(%rdi), %rcx
 682         mov    %rcx, -0x43(%rsi)
 683 L(P3Q7):
 684         mov    -0x3b(%rdi), %r10
 685         mov    %r10, -0x3b(%rsi)
 686 L(P3Q6):
 687         mov    -0x33(%rdi), %r8
 688         mov    %r8, -0x33(%rsi)
 689 L(P3Q5):
 690         mov    -0x2b(%rdi), %rcx
 691         mov    %rcx, -0x2b(%rsi)
 692 L(P3Q4):
 693         mov    -0x23(%rdi), %r10
 694         mov    %r10, -0x23(%rsi)
 695 L(P3Q3):
 696         mov    -0x1b(%rdi), %r8
 697         mov    %r8, -0x1b(%rsi)
 698 L(P3Q2):
 699         mov    -0x13(%rdi), %rcx
 700         mov    %rcx, -0x13(%rsi)
 701 L(P3Q1):
 702         mov    -0xb(%rdi), %r10
 703         mov    %r10, -0xb(%rsi)
 704         /*
 705          * These trailing loads/stores have to do all their loads 1st,
 706          * then do the stores.
 707          */
 708 L(P3Q0):
 709         movzwq -0x3(%rdi), %r8
 710         movzbq -0x1(%rdi), %r10
 711         mov    %r8w, -0x3(%rsi)
 712         mov    %r10b, -0x1(%rsi)
 713         ret
 714 
 715         .p2align 4
 716 L(P4Q9):
 717         mov    -0x4c(%rdi), %r8
 718         mov    %r8, -0x4c(%rsi)
 719 L(P4Q8):
 720         mov    -0x44(%rdi), %rcx
 721         mov    %rcx, -0x44(%rsi)
 722 L(P4Q7):
 723         mov    -0x3c(%rdi), %r10
 724         mov    %r10, -0x3c(%rsi)
 725 L(P4Q6):
 726         mov    -0x34(%rdi), %r8
 727         mov    %r8, -0x34(%rsi)
 728 L(P4Q5):
 729         mov    -0x2c(%rdi), %rcx
 730         mov    %rcx, -0x2c(%rsi)
 731 L(P4Q4):
 732         mov    -0x24(%rdi), %r10
 733         mov    %r10, -0x24(%rsi)
 734 L(P4Q3):
 735         mov    -0x1c(%rdi), %r8
 736         mov    %r8, -0x1c(%rsi)
 737 L(P4Q2):
 738         mov    -0x14(%rdi), %rcx
 739         mov    %rcx, -0x14(%rsi)
 740 L(P4Q1):
 741         mov    -0xc(%rdi), %r10
 742         mov    %r10, -0xc(%rsi)
 743 L(P4Q0):
 744         mov    -0x4(%rdi), %r8d
 745         mov    %r8d, -0x4(%rsi)
 746         ret
 747 
 748         .p2align 4
 749 L(P5Q9):
 750         mov    -0x4d(%rdi), %r8
 751         mov    %r8, -0x4d(%rsi)
 752 L(P5Q8):
 753         mov    -0x45(%rdi), %rcx
 754         mov    %rcx, -0x45(%rsi)
 755 L(P5Q7):
 756         mov    -0x3d(%rdi), %r10
 757         mov    %r10, -0x3d(%rsi)
 758 L(P5Q6):
 759         mov    -0x35(%rdi), %r8
 760         mov    %r8, -0x35(%rsi)
 761 L(P5Q5):
 762         mov    -0x2d(%rdi), %rcx
 763         mov    %rcx, -0x2d(%rsi)
 764 L(P5Q4):
 765         mov    -0x25(%rdi), %r10
 766         mov    %r10, -0x25(%rsi)
 767 L(P5Q3):
 768         mov    -0x1d(%rdi), %r8
 769         mov    %r8, -0x1d(%rsi)
 770 L(P5Q2):
 771         mov    -0x15(%rdi), %rcx
 772         mov    %rcx, -0x15(%rsi)
 773 L(P5Q1):
 774         mov    -0xd(%rdi), %r10
 775         mov    %r10, -0xd(%rsi)
 776 L(P5Q0):
 777         mov    -0x5(%rdi), %r8d
 778         movzbq -0x1(%rdi), %r10
 779         mov    %r8d, -0x5(%rsi)
 780         mov    %r10b, -0x1(%rsi)
 781         ret
 782 
 783         .p2align 4
 784 L(P6Q9):
 785         mov    -0x4e(%rdi), %r8
 786         mov    %r8, -0x4e(%rsi)
 787 L(P6Q8):
 788         mov    -0x46(%rdi), %rcx
 789         mov    %rcx, -0x46(%rsi)
 790 L(P6Q7):
 791         mov    -0x3e(%rdi), %r10
 792         mov    %r10, -0x3e(%rsi)
 793 L(P6Q6):
 794         mov    -0x36(%rdi), %r8
 795         mov    %r8, -0x36(%rsi)
 796 L(P6Q5):
 797         mov    -0x2e(%rdi), %rcx
 798         mov    %rcx, -0x2e(%rsi)
 799 L(P6Q4):
 800         mov    -0x26(%rdi), %r10
 801         mov    %r10, -0x26(%rsi)
 802 L(P6Q3):
 803         mov    -0x1e(%rdi), %r8
 804         mov    %r8, -0x1e(%rsi)
 805 L(P6Q2):
 806         mov    -0x16(%rdi), %rcx
 807         mov    %rcx, -0x16(%rsi)
 808 L(P6Q1):
 809         mov    -0xe(%rdi), %r10
 810         mov    %r10, -0xe(%rsi)
 811 L(P6Q0):
 812         mov    -0x6(%rdi), %r8d
 813         movzwq -0x2(%rdi), %r10
 814         mov    %r8d, -0x6(%rsi)
 815         mov    %r10w, -0x2(%rsi)
 816         ret
 817 
 818         .p2align 4
 819 L(P7Q9):
 820         mov    -0x4f(%rdi), %r8
 821         mov    %r8, -0x4f(%rsi)
 822 L(P7Q8):
 823         mov    -0x47(%rdi), %rcx
 824         mov    %rcx, -0x47(%rsi)
 825 L(P7Q7):
 826         mov    -0x3f(%rdi), %r10
 827         mov    %r10, -0x3f(%rsi)
 828 L(P7Q6):
 829         mov    -0x37(%rdi), %r8
 830         mov    %r8, -0x37(%rsi)
 831 L(P7Q5):
 832         mov    -0x2f(%rdi), %rcx
 833         mov    %rcx, -0x2f(%rsi)
 834 L(P7Q4):
 835         mov    -0x27(%rdi), %r10
 836         mov    %r10, -0x27(%rsi)
 837 L(P7Q3):
 838         mov    -0x1f(%rdi), %r8
 839         mov    %r8, -0x1f(%rsi)
 840 L(P7Q2):
 841         mov    -0x17(%rdi), %rcx
 842         mov    %rcx, -0x17(%rsi)
 843 L(P7Q1):
 844         mov    -0xf(%rdi), %r10
 845         mov    %r10, -0xf(%rsi)
 846 L(P7Q0):
 847         mov    -0x7(%rdi), %r8d
 848         movzwq -0x3(%rdi), %r10
 849         movzbq -0x1(%rdi), %rcx
 850         mov    %r8d, -0x7(%rsi)
 851         mov    %r10w, -0x3(%rsi)
 852         mov    %cl, -0x1(%rsi)
 853         ret
 854 
 855         /*
 856          * For large sizes rep smovq is fastest.
 857          * Transition point determined experimentally as measured on
 858          * Intel Xeon processors (incl. Nehalem and previous generations) and
 859          * AMD Opteron. The transition value is patched at boot time to avoid
 860          * memory reference hit.
 861          */
 862         .globl bcopy_patch_start
 863 bcopy_patch_start:
 864         cmpq    $BCOPY_NHM_REP, %rdx
 865         .globl bcopy_patch_end
 866 bcopy_patch_end:
 867 
 868         .p2align 4
 869         ALTENTRY(bcopy_ck_size)
 870 
 871         cmpq    $BCOPY_DFLT_REP, %rdx
 872         jae     L(use_rep)
 873 
 874         /*
 875          * Align to a 8-byte boundary. Avoids penalties from unaligned stores
 876          * as well as from stores spanning cachelines.
 877          */
 878         test    $0x7, %rsi
 879         jz      L(aligned_loop)
 880         test    $0x1, %rsi
 881         jz      2f
 882         movzbq  (%rdi), %r8
 883         dec     %rdx
 884         inc     %rdi
 885         mov     %r8b, (%rsi)
 886         inc     %rsi
 887 2:
 888         test    $0x2, %rsi
 889         jz      4f
 890         movzwq  (%rdi), %r8
 891         sub     $0x2, %rdx
 892         add     $0x2, %rdi
 893         mov     %r8w, (%rsi)
 894         add     $0x2, %rsi
 895 4:
 896         test    $0x4, %rsi
 897         jz      L(aligned_loop)
 898         mov     (%rdi), %r8d
 899         sub     $0x4, %rdx
 900         add     $0x4, %rdi
 901         mov     %r8d, (%rsi)
 902         add     $0x4, %rsi
 903 
 904         /*
 905          * Copy 64-bytes per loop
 906          */
 907         .p2align 4
 908 L(aligned_loop):
 909         mov     (%rdi), %r8
 910         mov     0x8(%rdi), %r10
 911         lea     -0x40(%rdx), %rdx
 912         mov     %r8, (%rsi)
 913         mov     %r10, 0x8(%rsi)
 914         mov     0x10(%rdi), %rcx
 915         mov     0x18(%rdi), %r8
 916         mov     %rcx, 0x10(%rsi)
 917         mov     %r8, 0x18(%rsi)
 918 
 919         cmp     $0x40, %rdx
 920         mov     0x20(%rdi), %r10
 921         mov     0x28(%rdi), %rcx
 922         mov     %r10, 0x20(%rsi)
 923         mov     %rcx, 0x28(%rsi)
 924         mov     0x30(%rdi), %r8
 925         mov     0x38(%rdi), %r10
 926         lea     0x40(%rdi), %rdi
 927         mov     %r8, 0x30(%rsi)
 928         mov     %r10, 0x38(%rsi)
 929         lea     0x40(%rsi), %rsi
 930         jae     L(aligned_loop)
 931 
 932         /*
 933          * Copy remaining bytes (0-63)
 934          */
 935 L(do_remainder):
 936         leaq    L(fwdPxQx)(%rip), %r10
 937         addq    %rdx, %rdi
 938         addq    %rdx, %rsi
 939         movslq  (%r10,%rdx,4), %rcx
 940         leaq    (%rcx,%r10,1), %r10
 941         INDIRECT_JMP_REG(r10)
 942 
 943         /*
 944          * Use rep smovq. Clear remainder via unrolled code
 945          */
 946         .p2align 4
 947 L(use_rep):
 948         xchgq   %rdi, %rsi              /* %rsi = source, %rdi = destination */
 949         movq    %rdx, %rcx              /* %rcx = count */
 950         shrq    $3, %rcx                /* 8-byte word count */
 951         rep
 952           smovq
 953 
 954         xchgq   %rsi, %rdi              /* %rdi = src, %rsi = destination */
 955         andq    $7, %rdx                /* remainder */
 956         jnz     L(do_remainder)
 957         ret
 958 #undef  L
 959         SET_SIZE(bcopy_ck_size)
 960 
 961 #ifdef DEBUG
 962         /*
 963          * Setup frame on the run-time stack. The end of the input argument
 964          * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
 965          * always points to the end of the latest allocated stack frame.
 966          * panic(const char *format, ...) is a varargs function. When a
 967          * function taking variable arguments is called, %rax must be set
 968          * to eight times the number of floating point parameters passed
 969          * to the function in SSE registers.
 970          */
 971 call_panic:
 972         pushq   %rbp                    /* align stack properly */
 973         movq    %rsp, %rbp
 974         xorl    %eax, %eax              /* no variable arguments */
 975         call    panic                   /* %rdi = format string */
 976 #endif
 977         SET_SIZE(bcopy_altentry)
 978         SET_SIZE(bcopy)
 979 
 980 #elif defined(__i386)
 981 
 982 #define ARG_FROM        4
 983 #define ARG_TO          8
 984 #define ARG_COUNT       12
 985 
 986         ENTRY(bcopy)
 987 #ifdef DEBUG
 988         movl    ARG_COUNT(%esp), %eax
 989         orl     %eax, %eax
 990         jz      1f
 991         movl    postbootkernelbase, %eax
 992         cmpl    %eax, ARG_FROM(%esp)
 993         jb      0f
 994         cmpl    %eax, ARG_TO(%esp)
 995         jnb     1f
 996 0:      pushl   %ebp
 997         movl    %esp, %ebp
 998         pushl   $.bcopy_panic_msg
 999         call    panic
1000 1:
1001 #endif
1002 do_copy:
1003         movl    %esi, %eax              /* save registers */
1004         movl    %edi, %edx
1005         movl    ARG_COUNT(%esp), %ecx
1006         movl    ARG_FROM(%esp), %esi
1007         movl    ARG_TO(%esp), %edi
1008 
1009         shrl    $2, %ecx                /* word count */
1010         rep
1011           smovl
1012         movl    ARG_COUNT(%esp), %ecx
1013         andl    $3, %ecx                /* bytes left over */
1014         rep
1015           smovb
1016         movl    %eax, %esi              /* restore registers */
1017         movl    %edx, %edi
1018         ret
1019         SET_SIZE(bcopy)
1020 
1021 #undef  ARG_COUNT
1022 #undef  ARG_FROM
1023 #undef  ARG_TO
1024 
1025 #endif  /* __i386 */
1026 #endif  /* __lint */
1027 
1028 
1029 /*
1030  * Zero a block of storage, returning an error code if we
1031  * take a kernel pagefault which cannot be resolved.
1032  * Returns errno value on pagefault error, 0 if all ok
1033  */
1034 
1035 #if defined(__lint)
1036 
1037 /* ARGSUSED */
1038 int
1039 kzero(void *addr, size_t count)
1040 { return (0); }
1041 
1042 #else   /* __lint */
1043 
1044 #if defined(__amd64)
1045 
1046         ENTRY(kzero)
1047 #ifdef DEBUG
1048         cmpq    postbootkernelbase(%rip), %rdi  /* %rdi = addr */
1049         jnb     0f
1050         leaq    .kzero_panic_msg(%rip), %rdi
1051         jmp     call_panic              /* setup stack and call panic */
1052 0:
1053 #endif
1054         /*
1055          * pass lofault value as 3rd argument for fault return
1056          */
1057         leaq    _kzeroerr(%rip), %rdx
1058 
1059         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
1060         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
1061         movq    %rdx, T_LOFAULT(%r9)    /* new lofault */
1062         call    bzero_altentry
1063         xorl    %eax, %eax
1064         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
1065         ret
1066         /*
1067          * A fault during bzero is indicated through an errno value
1068          * in %rax when we iretq to here.
1069          */
1070 _kzeroerr:
1071         addq    $8, %rsp                /* pop bzero_altentry call ret addr */
1072         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
1073         ret
1074         SET_SIZE(kzero)
1075 
1076 #elif defined(__i386)
1077 
1078 #define ARG_ADDR        8
1079 #define ARG_COUNT       12
1080 
1081         ENTRY(kzero)
1082 #ifdef DEBUG
1083         pushl   %ebp
1084         movl    %esp, %ebp
1085         movl    postbootkernelbase, %eax
1086         cmpl    %eax, ARG_ADDR(%ebp)
1087         jnb     0f
1088         pushl   $.kzero_panic_msg
1089         call    panic
1090 0:      popl    %ebp
1091 #endif
1092         lea     _kzeroerr, %eax         /* kzeroerr is lofault value */
1093 
1094         pushl   %ebp                    /* save stack base */
1095         movl    %esp, %ebp              /* set new stack base */
1096         pushl   %edi                    /* save %edi */
1097 
1098         mov     %gs:CPU_THREAD, %edx
1099         movl    T_LOFAULT(%edx), %edi
1100         pushl   %edi                    /* save the current lofault */
1101         movl    %eax, T_LOFAULT(%edx)   /* new lofault */
1102 
1103         movl    ARG_COUNT(%ebp), %ecx   /* get size in bytes */
1104         movl    ARG_ADDR(%ebp), %edi    /* %edi <- address of bytes to clear */
1105         shrl    $2, %ecx                /* Count of double words to zero */
1106         xorl    %eax, %eax              /* sstol val */
1107         rep
1108           sstol                 /* %ecx contains words to clear (%eax=0) */
1109 
1110         movl    ARG_COUNT(%ebp), %ecx   /* get size in bytes */
1111         andl    $3, %ecx                /* do mod 4 */
1112         rep
1113           sstob                 /* %ecx contains residual bytes to clear */
1114 
1115         /*
1116          * A fault during kzero is indicated through an errno value
1117          * in %eax when we iret to here.
1118          */
1119 _kzeroerr:
1120         popl    %edi
1121         movl    %edi, T_LOFAULT(%edx)   /* restore the original lofault */
1122         popl    %edi
1123         popl    %ebp
1124         ret
1125         SET_SIZE(kzero)
1126 
1127 #undef  ARG_ADDR
1128 #undef  ARG_COUNT
1129 
1130 #endif  /* __i386 */
1131 #endif  /* __lint */
1132 
1133 /*
1134  * Zero a block of storage.
1135  */
1136 
1137 #if defined(__lint)
1138 
1139 /* ARGSUSED */
1140 void
1141 bzero(void *addr, size_t count)
1142 {}
1143 
1144 #else   /* __lint */
1145 
1146 #if defined(__amd64)
1147 
1148         ENTRY(bzero)
1149 #ifdef DEBUG
1150         cmpq    postbootkernelbase(%rip), %rdi  /* %rdi = addr */
1151         jnb     0f
1152         leaq    .bzero_panic_msg(%rip), %rdi
1153         jmp     call_panic              /* setup stack and call panic */
1154 0:
1155 #endif
1156         ALTENTRY(bzero_altentry)
1157 do_zero:
1158 #define L(s) .bzero/**/s
1159         xorl    %eax, %eax
1160 
1161         cmpq    $0x50, %rsi             /* 80 */
1162         jae     L(ck_align)
1163 
1164         /*
1165          * Performance data shows many caller's are zeroing small buffers. So
1166          * for best perf for these sizes unrolled code is used. Store zeros
1167          * without worrying about alignment.
1168          */
1169         leaq    L(setPxQx)(%rip), %r10
1170         addq    %rsi, %rdi
1171         movslq  (%r10,%rsi,4), %rcx
1172         leaq    (%rcx,%r10,1), %r10
1173         INDIRECT_JMP_REG(r10)
1174 
1175         .p2align 4
1176 L(setPxQx):
1177         .int       L(P0Q0)-L(setPxQx)   /* 0 */
1178         .int       L(P1Q0)-L(setPxQx)
1179         .int       L(P2Q0)-L(setPxQx)
1180         .int       L(P3Q0)-L(setPxQx)
1181         .int       L(P4Q0)-L(setPxQx)
1182         .int       L(P5Q0)-L(setPxQx)
1183         .int       L(P6Q0)-L(setPxQx)
1184         .int       L(P7Q0)-L(setPxQx)
1185 
1186         .int       L(P0Q1)-L(setPxQx)   /* 8 */
1187         .int       L(P1Q1)-L(setPxQx)
1188         .int       L(P2Q1)-L(setPxQx)
1189         .int       L(P3Q1)-L(setPxQx)
1190         .int       L(P4Q1)-L(setPxQx)
1191         .int       L(P5Q1)-L(setPxQx)
1192         .int       L(P6Q1)-L(setPxQx)
1193         .int       L(P7Q1)-L(setPxQx)
1194 
1195         .int       L(P0Q2)-L(setPxQx)   /* 16 */
1196         .int       L(P1Q2)-L(setPxQx)
1197         .int       L(P2Q2)-L(setPxQx)
1198         .int       L(P3Q2)-L(setPxQx)
1199         .int       L(P4Q2)-L(setPxQx)
1200         .int       L(P5Q2)-L(setPxQx)
1201         .int       L(P6Q2)-L(setPxQx)
1202         .int       L(P7Q2)-L(setPxQx)
1203 
1204         .int       L(P0Q3)-L(setPxQx)   /* 24 */
1205         .int       L(P1Q3)-L(setPxQx)
1206         .int       L(P2Q3)-L(setPxQx)
1207         .int       L(P3Q3)-L(setPxQx)
1208         .int       L(P4Q3)-L(setPxQx)
1209         .int       L(P5Q3)-L(setPxQx)
1210         .int       L(P6Q3)-L(setPxQx)
1211         .int       L(P7Q3)-L(setPxQx)
1212 
1213         .int       L(P0Q4)-L(setPxQx)   /* 32 */
1214         .int       L(P1Q4)-L(setPxQx)
1215         .int       L(P2Q4)-L(setPxQx)
1216         .int       L(P3Q4)-L(setPxQx)
1217         .int       L(P4Q4)-L(setPxQx)
1218         .int       L(P5Q4)-L(setPxQx)
1219         .int       L(P6Q4)-L(setPxQx)
1220         .int       L(P7Q4)-L(setPxQx)
1221 
1222         .int       L(P0Q5)-L(setPxQx)   /* 40 */
1223         .int       L(P1Q5)-L(setPxQx)
1224         .int       L(P2Q5)-L(setPxQx)
1225         .int       L(P3Q5)-L(setPxQx)
1226         .int       L(P4Q5)-L(setPxQx)
1227         .int       L(P5Q5)-L(setPxQx)
1228         .int       L(P6Q5)-L(setPxQx)
1229         .int       L(P7Q5)-L(setPxQx)
1230 
1231         .int       L(P0Q6)-L(setPxQx)   /* 48 */
1232         .int       L(P1Q6)-L(setPxQx)
1233         .int       L(P2Q6)-L(setPxQx)
1234         .int       L(P3Q6)-L(setPxQx)
1235         .int       L(P4Q6)-L(setPxQx)
1236         .int       L(P5Q6)-L(setPxQx)
1237         .int       L(P6Q6)-L(setPxQx)
1238         .int       L(P7Q6)-L(setPxQx)
1239 
1240         .int       L(P0Q7)-L(setPxQx)   /* 56 */
1241         .int       L(P1Q7)-L(setPxQx)
1242         .int       L(P2Q7)-L(setPxQx)
1243         .int       L(P3Q7)-L(setPxQx)
1244         .int       L(P4Q7)-L(setPxQx)
1245         .int       L(P5Q7)-L(setPxQx)
1246         .int       L(P6Q7)-L(setPxQx)
1247         .int       L(P7Q7)-L(setPxQx)
1248 
1249         .int       L(P0Q8)-L(setPxQx)   /* 64 */
1250         .int       L(P1Q8)-L(setPxQx)
1251         .int       L(P2Q8)-L(setPxQx)
1252         .int       L(P3Q8)-L(setPxQx)
1253         .int       L(P4Q8)-L(setPxQx)
1254         .int       L(P5Q8)-L(setPxQx)
1255         .int       L(P6Q8)-L(setPxQx)
1256         .int       L(P7Q8)-L(setPxQx)
1257 
1258         .int       L(P0Q9)-L(setPxQx)   /* 72 */
1259         .int       L(P1Q9)-L(setPxQx)
1260         .int       L(P2Q9)-L(setPxQx)
1261         .int       L(P3Q9)-L(setPxQx)
1262         .int       L(P4Q9)-L(setPxQx)
1263         .int       L(P5Q9)-L(setPxQx)
1264         .int       L(P6Q9)-L(setPxQx)
1265         .int       L(P7Q9)-L(setPxQx)   /* 79 */
1266 
1267         .p2align 4
1268 L(P0Q9): mov    %rax, -0x48(%rdi)
1269 L(P0Q8): mov    %rax, -0x40(%rdi)
1270 L(P0Q7): mov    %rax, -0x38(%rdi)
1271 L(P0Q6): mov    %rax, -0x30(%rdi)
1272 L(P0Q5): mov    %rax, -0x28(%rdi)
1273 L(P0Q4): mov    %rax, -0x20(%rdi)
1274 L(P0Q3): mov    %rax, -0x18(%rdi)
1275 L(P0Q2): mov    %rax, -0x10(%rdi)
1276 L(P0Q1): mov    %rax, -0x8(%rdi)
1277 L(P0Q0):
1278          ret
1279 
1280         .p2align 4
1281 L(P1Q9): mov    %rax, -0x49(%rdi)
1282 L(P1Q8): mov    %rax, -0x41(%rdi)
1283 L(P1Q7): mov    %rax, -0x39(%rdi)
1284 L(P1Q6): mov    %rax, -0x31(%rdi)
1285 L(P1Q5): mov    %rax, -0x29(%rdi)
1286 L(P1Q4): mov    %rax, -0x21(%rdi)
1287 L(P1Q3): mov    %rax, -0x19(%rdi)
1288 L(P1Q2): mov    %rax, -0x11(%rdi)
1289 L(P1Q1): mov    %rax, -0x9(%rdi)
1290 L(P1Q0): mov    %al, -0x1(%rdi)
1291          ret
1292 
1293         .p2align 4
1294 L(P2Q9): mov    %rax, -0x4a(%rdi)
1295 L(P2Q8): mov    %rax, -0x42(%rdi)
1296 L(P2Q7): mov    %rax, -0x3a(%rdi)
1297 L(P2Q6): mov    %rax, -0x32(%rdi)
1298 L(P2Q5): mov    %rax, -0x2a(%rdi)
1299 L(P2Q4): mov    %rax, -0x22(%rdi)
1300 L(P2Q3): mov    %rax, -0x1a(%rdi)
1301 L(P2Q2): mov    %rax, -0x12(%rdi)
1302 L(P2Q1): mov    %rax, -0xa(%rdi)
1303 L(P2Q0): mov    %ax, -0x2(%rdi)
1304          ret
1305 
1306         .p2align 4
1307 L(P3Q9): mov    %rax, -0x4b(%rdi)
1308 L(P3Q8): mov    %rax, -0x43(%rdi)
1309 L(P3Q7): mov    %rax, -0x3b(%rdi)
1310 L(P3Q6): mov    %rax, -0x33(%rdi)
1311 L(P3Q5): mov    %rax, -0x2b(%rdi)
1312 L(P3Q4): mov    %rax, -0x23(%rdi)
1313 L(P3Q3): mov    %rax, -0x1b(%rdi)
1314 L(P3Q2): mov    %rax, -0x13(%rdi)
1315 L(P3Q1): mov    %rax, -0xb(%rdi)
1316 L(P3Q0): mov    %ax, -0x3(%rdi)
1317          mov    %al, -0x1(%rdi)
1318          ret
1319 
1320         .p2align 4
1321 L(P4Q9): mov    %rax, -0x4c(%rdi)
1322 L(P4Q8): mov    %rax, -0x44(%rdi)
1323 L(P4Q7): mov    %rax, -0x3c(%rdi)
1324 L(P4Q6): mov    %rax, -0x34(%rdi)
1325 L(P4Q5): mov    %rax, -0x2c(%rdi)
1326 L(P4Q4): mov    %rax, -0x24(%rdi)
1327 L(P4Q3): mov    %rax, -0x1c(%rdi)
1328 L(P4Q2): mov    %rax, -0x14(%rdi)
1329 L(P4Q1): mov    %rax, -0xc(%rdi)
1330 L(P4Q0): mov    %eax, -0x4(%rdi)
1331          ret
1332 
1333         .p2align 4
1334 L(P5Q9): mov    %rax, -0x4d(%rdi)
1335 L(P5Q8): mov    %rax, -0x45(%rdi)
1336 L(P5Q7): mov    %rax, -0x3d(%rdi)
1337 L(P5Q6): mov    %rax, -0x35(%rdi)
1338 L(P5Q5): mov    %rax, -0x2d(%rdi)
1339 L(P5Q4): mov    %rax, -0x25(%rdi)
1340 L(P5Q3): mov    %rax, -0x1d(%rdi)
1341 L(P5Q2): mov    %rax, -0x15(%rdi)
1342 L(P5Q1): mov    %rax, -0xd(%rdi)
1343 L(P5Q0): mov    %eax, -0x5(%rdi)
1344          mov    %al, -0x1(%rdi)
1345          ret
1346 
1347         .p2align 4
1348 L(P6Q9): mov    %rax, -0x4e(%rdi)
1349 L(P6Q8): mov    %rax, -0x46(%rdi)
1350 L(P6Q7): mov    %rax, -0x3e(%rdi)
1351 L(P6Q6): mov    %rax, -0x36(%rdi)
1352 L(P6Q5): mov    %rax, -0x2e(%rdi)
1353 L(P6Q4): mov    %rax, -0x26(%rdi)
1354 L(P6Q3): mov    %rax, -0x1e(%rdi)
1355 L(P6Q2): mov    %rax, -0x16(%rdi)
1356 L(P6Q1): mov    %rax, -0xe(%rdi)
1357 L(P6Q0): mov    %eax, -0x6(%rdi)
1358          mov    %ax, -0x2(%rdi)
1359          ret
1360 
1361         .p2align 4
1362 L(P7Q9): mov    %rax, -0x4f(%rdi)
1363 L(P7Q8): mov    %rax, -0x47(%rdi)
1364 L(P7Q7): mov    %rax, -0x3f(%rdi)
1365 L(P7Q6): mov    %rax, -0x37(%rdi)
1366 L(P7Q5): mov    %rax, -0x2f(%rdi)
1367 L(P7Q4): mov    %rax, -0x27(%rdi)
1368 L(P7Q3): mov    %rax, -0x1f(%rdi)
1369 L(P7Q2): mov    %rax, -0x17(%rdi)
1370 L(P7Q1): mov    %rax, -0xf(%rdi)
1371 L(P7Q0): mov    %eax, -0x7(%rdi)
1372          mov    %ax, -0x3(%rdi)
1373          mov    %al, -0x1(%rdi)
1374          ret
1375 
1376         /*
1377          * Align to a 16-byte boundary. Avoids penalties from unaligned stores
1378          * as well as from stores spanning cachelines. Note 16-byte alignment
1379          * is better in case where rep sstosq is used.
1380          */
1381         .p2align 4
1382 L(ck_align):
1383         test    $0xf, %rdi
1384         jz      L(aligned_now)
1385         test    $1, %rdi
1386         jz      2f
1387         mov     %al, (%rdi)
1388         dec     %rsi
1389         lea     1(%rdi),%rdi
1390 2:
1391         test    $2, %rdi
1392         jz      4f
1393         mov     %ax, (%rdi)
1394         sub     $2, %rsi
1395         lea     2(%rdi),%rdi
1396 4:
1397         test    $4, %rdi
1398         jz      8f
1399         mov     %eax, (%rdi)
1400         sub     $4, %rsi
1401         lea     4(%rdi),%rdi
1402 8:
1403         test    $8, %rdi
1404         jz      L(aligned_now)
1405         mov     %rax, (%rdi)
1406         sub     $8, %rsi
1407         lea     8(%rdi),%rdi
1408 
1409         /*
1410          * For large sizes rep sstoq is fastest.
1411          * Transition point determined experimentally as measured on
1412          * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
1413          */
1414 L(aligned_now):
1415         cmp     $BZERO_USE_REP, %rsi
1416         ja      L(use_rep)
1417 
1418         /*
1419          * zero 64-bytes per loop
1420          */
1421         .p2align 4
1422 L(bzero_loop):
1423         leaq    -0x40(%rsi), %rsi
1424         cmpq    $0x40, %rsi
1425         movq    %rax, (%rdi)
1426         movq    %rax, 0x8(%rdi)
1427         movq    %rax, 0x10(%rdi)
1428         movq    %rax, 0x18(%rdi)
1429         movq    %rax, 0x20(%rdi)
1430         movq    %rax, 0x28(%rdi)
1431         movq    %rax, 0x30(%rdi)
1432         movq    %rax, 0x38(%rdi)
1433         leaq    0x40(%rdi), %rdi
1434         jae     L(bzero_loop)
1435 
1436         /*
1437          * Clear any remaining bytes..
1438          */
1439 9:
1440         leaq    L(setPxQx)(%rip), %r10
1441         addq    %rsi, %rdi
1442         movslq  (%r10,%rsi,4), %rcx
1443         leaq    (%rcx,%r10,1), %r10
1444         INDIRECT_JMP_REG(r10)
1445 
1446         /*
1447          * Use rep sstoq. Clear any remainder via unrolled code
1448          */
1449         .p2align 4
1450 L(use_rep):
1451         movq    %rsi, %rcx              /* get size in bytes */
1452         shrq    $3, %rcx                /* count of 8-byte words to zero */
1453         rep
1454           sstoq                         /* %rcx = words to clear (%rax=0) */
1455         andq    $7, %rsi                /* remaining bytes */
1456         jnz     9b
1457         ret
1458 #undef  L
1459         SET_SIZE(bzero_altentry)
1460         SET_SIZE(bzero)
1461 
1462 #elif defined(__i386)
1463 
1464 #define ARG_ADDR        4
1465 #define ARG_COUNT       8
1466 
1467         ENTRY(bzero)
1468 #ifdef DEBUG
1469         movl    postbootkernelbase, %eax
1470         cmpl    %eax, ARG_ADDR(%esp)
1471         jnb     0f
1472         pushl   %ebp
1473         movl    %esp, %ebp
1474         pushl   $.bzero_panic_msg
1475         call    panic
1476 0:
1477 #endif
1478 do_zero:
1479         movl    %edi, %edx
1480         movl    ARG_COUNT(%esp), %ecx
1481         movl    ARG_ADDR(%esp), %edi
1482         shrl    $2, %ecx
1483         xorl    %eax, %eax
1484         rep
1485           sstol
1486         movl    ARG_COUNT(%esp), %ecx
1487         andl    $3, %ecx
1488         rep
1489           sstob
1490         movl    %edx, %edi
1491         ret
1492         SET_SIZE(bzero)
1493 
1494 #undef  ARG_ADDR
1495 #undef  ARG_COUNT
1496 
1497 #endif  /* __i386 */
1498 #endif  /* __lint */
1499 
1500 /*
1501  * Transfer data to and from user space -
1502  * Note that these routines can cause faults
1503  * It is assumed that the kernel has nothing at
1504  * less than KERNELBASE in the virtual address space.
1505  *
1506  * Note that copyin(9F) and copyout(9F) are part of the
1507  * DDI/DKI which specifies that they return '-1' on "errors."
1508  *
1509  * Sigh.
1510  *
1511  * So there's two extremely similar routines - xcopyin_nta() and
1512  * xcopyout_nta() which return the errno that we've faithfully computed.
1513  * This allows other callers (e.g. uiomove(9F)) to work correctly.
1514  * Given that these are used pretty heavily, we expand the calling
1515  * sequences inline for all flavours (rather than making wrappers).
1516  */
1517 
1518 /*
1519  * Copy user data to kernel space.
1520  */
1521 
1522 #if defined(__lint)
1523 
1524 /* ARGSUSED */
1525 int
1526 copyin(const void *uaddr, void *kaddr, size_t count)
1527 { return (0); }
1528 
1529 #else   /* lint */
1530 
1531 #if defined(__amd64)
1532 
1533         ENTRY(copyin)
1534         pushq   %rbp
1535         movq    %rsp, %rbp
1536         subq    $24, %rsp
1537 
1538         /*
1539          * save args in case we trap and need to rerun as a copyop
1540          */
1541         movq    %rdi, (%rsp)
1542         movq    %rsi, 0x8(%rsp)
1543         movq    %rdx, 0x10(%rsp)
1544 
1545         movq    kernelbase(%rip), %rax
1546 #ifdef DEBUG
1547         cmpq    %rax, %rsi              /* %rsi = kaddr */
1548         jnb     1f
1549         leaq    .copyin_panic_msg(%rip), %rdi
1550         xorl    %eax, %eax
1551         call    panic
1552 1:
1553 #endif
1554         /*
1555          * pass lofault value as 4th argument to do_copy_fault
1556          */
1557         leaq    _copyin_err(%rip), %rcx
1558 
1559         movq    %gs:CPU_THREAD, %r9
1560         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1561         jae     3f                      /* take copyop if uaddr > kernelbase */
1562         SMAP_DISABLE_INSTR(0)
1563         jmp     do_copy_fault           /* Takes care of leave for us */
1564 
1565 _copyin_err:
1566         SMAP_ENABLE_INSTR(2)
1567         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1568         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1569 3:
1570         movq    T_COPYOPS(%r9), %rax
1571         cmpq    $0, %rax
1572         jz      2f
1573         /*
1574          * reload args for the copyop
1575          */
1576         movq    (%rsp), %rdi
1577         movq    0x8(%rsp), %rsi
1578         movq    0x10(%rsp), %rdx
1579         leave
1580         movq    CP_COPYIN(%rax), %rax
1581         INDIRECT_JMP_REG(rax)
1582 
1583 2:      movl    $-1, %eax
1584         leave
1585         ret
1586         SET_SIZE(copyin)
1587 
1588 #elif defined(__i386)
1589 
1590 #define ARG_UADDR       4
1591 #define ARG_KADDR       8
1592 
1593         ENTRY(copyin)
1594         movl    kernelbase, %ecx
1595 #ifdef DEBUG
1596         cmpl    %ecx, ARG_KADDR(%esp)
1597         jnb     1f
1598         pushl   %ebp
1599         movl    %esp, %ebp
1600         pushl   $.copyin_panic_msg
1601         call    panic
1602 1:
1603 #endif
1604         lea     _copyin_err, %eax
1605 
1606         movl    %gs:CPU_THREAD, %edx
1607         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1608         jb      do_copy_fault
1609         jmp     3f
1610 
1611 _copyin_err:
1612         popl    %ecx
1613         popl    %edi
1614         movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1615         popl    %esi
1616         popl    %ebp
1617 3:
1618         movl    T_COPYOPS(%edx), %eax
1619         cmpl    $0, %eax
1620         jz      2f
1621         jmp     *CP_COPYIN(%eax)
1622 
1623 2:      movl    $-1, %eax
1624         ret
1625         SET_SIZE(copyin)
1626 
1627 #undef  ARG_UADDR
1628 #undef  ARG_KADDR
1629 
1630 #endif  /* __i386 */
1631 #endif  /* __lint */
1632 
1633 #if defined(__lint)
1634 
1635 /* ARGSUSED */
1636 int
1637 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached)
1638 { return (0); }
1639 
1640 #else   /* __lint */
1641 
1642 #if defined(__amd64)
1643 
1644         ENTRY(xcopyin_nta)
1645         pushq   %rbp
1646         movq    %rsp, %rbp
1647         subq    $24, %rsp
1648 
1649         /*
1650          * save args in case we trap and need to rerun as a copyop
1651          * %rcx is consumed in this routine so we don't need to save
1652          * it.
1653          */
1654         movq    %rdi, (%rsp)
1655         movq    %rsi, 0x8(%rsp)
1656         movq    %rdx, 0x10(%rsp)
1657 
1658         movq    kernelbase(%rip), %rax
1659 #ifdef DEBUG
1660         cmpq    %rax, %rsi              /* %rsi = kaddr */
1661         jnb     1f
1662         leaq    .xcopyin_panic_msg(%rip), %rdi
1663         xorl    %eax, %eax
1664         call    panic
1665 1:
1666 #endif
1667         movq    %gs:CPU_THREAD, %r9
1668         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1669         jae     4f
1670         cmpq    $0, %rcx                /* No non-temporal access? */
1671         /*
1672          * pass lofault value as 4th argument to do_copy_fault
1673          */
1674         leaq    _xcopyin_err(%rip), %rcx        /* doesn't set rflags */
1675         jnz     6f                      /* use regular access */
1676         /*
1677          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1678          */
1679         cmpq    $XCOPY_MIN_SIZE, %rdx
1680         jae     5f
1681 6:
1682         SMAP_DISABLE_INSTR(1)
1683         jmp     do_copy_fault
1684 
1685         /*
1686          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1687          * count is COUNT_ALIGN_SIZE aligned.
1688          */
1689 5:
1690         movq    %rdi, %r10
1691         orq     %rsi, %r10
1692         andq    $NTA_ALIGN_MASK, %r10
1693         orq     %rdx, %r10
1694         andq    $COUNT_ALIGN_MASK, %r10
1695         jnz     6b
1696         leaq    _xcopyin_nta_err(%rip), %rcx    /* doesn't set rflags */
1697         SMAP_DISABLE_INSTR(2)
1698         jmp     do_copy_fault_nta       /* use non-temporal access */
1699 
1700 4:
1701         movl    $EFAULT, %eax
1702         jmp     3f
1703 
1704         /*
1705          * A fault during do_copy_fault or do_copy_fault_nta is
1706          * indicated through an errno value in %rax and we iret from the
1707          * trap handler to here.
1708          */
1709 _xcopyin_err:
1710         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1711 _xcopyin_nta_err:
1712         SMAP_ENABLE_INSTR(3)
1713         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1714 3:
1715         movq    T_COPYOPS(%r9), %r8
1716         cmpq    $0, %r8
1717         jz      2f
1718 
1719         /*
1720          * reload args for the copyop
1721          */
1722         movq    (%rsp), %rdi
1723         movq    0x8(%rsp), %rsi
1724         movq    0x10(%rsp), %rdx
1725         leave
1726         movq    CP_XCOPYIN(%r8), %r8
1727         INDIRECT_JMP_REG(r8)
1728 
1729 2:      leave
1730         ret
1731         SET_SIZE(xcopyin_nta)
1732 
1733 #elif defined(__i386)
1734 
1735 #define ARG_UADDR       4
1736 #define ARG_KADDR       8
1737 #define ARG_COUNT       12
1738 #define ARG_CACHED      16
1739 
1740         .globl  use_sse_copy
1741 
1742         ENTRY(xcopyin_nta)
1743         movl    kernelbase, %ecx
1744         lea     _xcopyin_err, %eax
1745         movl    %gs:CPU_THREAD, %edx
1746         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1747         jae     4f
1748 
1749         cmpl    $0, use_sse_copy        /* no sse support */
1750         jz      do_copy_fault
1751 
1752         cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
1753         jnz     do_copy_fault
1754 
1755         /*
1756          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1757          */
1758         cmpl    $XCOPY_MIN_SIZE, ARG_COUNT(%esp)
1759         jb      do_copy_fault
1760 
1761         /*
1762          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1763          * count is COUNT_ALIGN_SIZE aligned.
1764          */
1765         movl    ARG_UADDR(%esp), %ecx
1766         orl     ARG_KADDR(%esp), %ecx
1767         andl    $NTA_ALIGN_MASK, %ecx
1768         orl     ARG_COUNT(%esp), %ecx
1769         andl    $COUNT_ALIGN_MASK, %ecx
1770         jnz     do_copy_fault
1771 
1772         jmp     do_copy_fault_nta       /* use regular access */
1773 
1774 4:
1775         movl    $EFAULT, %eax
1776         jmp     3f
1777 
1778         /*
1779          * A fault during do_copy_fault or do_copy_fault_nta is
1780          * indicated through an errno value in %eax and we iret from the
1781          * trap handler to here.
1782          */
1783 _xcopyin_err:
1784         popl    %ecx
1785         popl    %edi
1786         movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1787         popl    %esi
1788         popl    %ebp
1789 3:
1790         cmpl    $0, T_COPYOPS(%edx)
1791         jz      2f
1792         movl    T_COPYOPS(%edx), %eax
1793         jmp     *CP_XCOPYIN(%eax)
1794 
1795 2:      rep;    ret     /* use 2 byte return instruction when branch target */
1796                         /* AMD Software Optimization Guide - Section 6.2 */
1797         SET_SIZE(xcopyin_nta)
1798 
1799 #undef  ARG_UADDR
1800 #undef  ARG_KADDR
1801 #undef  ARG_COUNT
1802 #undef  ARG_CACHED
1803 
1804 #endif  /* __i386 */
1805 #endif  /* __lint */
1806 
1807 /*
1808  * Copy kernel data to user space.
1809  */
1810 
1811 #if defined(__lint)
1812 
1813 /* ARGSUSED */
1814 int
1815 copyout(const void *kaddr, void *uaddr, size_t count)
1816 { return (0); }
1817 
1818 #else   /* __lint */
1819 
1820 #if defined(__amd64)
1821 
1822         ENTRY(copyout)
1823         pushq   %rbp
1824         movq    %rsp, %rbp
1825         subq    $24, %rsp
1826 
1827         /*
1828          * save args in case we trap and need to rerun as a copyop
1829          */
1830         movq    %rdi, (%rsp)
1831         movq    %rsi, 0x8(%rsp)
1832         movq    %rdx, 0x10(%rsp)
1833 
1834         movq    kernelbase(%rip), %rax
1835 #ifdef DEBUG
1836         cmpq    %rax, %rdi              /* %rdi = kaddr */
1837         jnb     1f
1838         leaq    .copyout_panic_msg(%rip), %rdi
1839         xorl    %eax, %eax
1840         call    panic
1841 1:
1842 #endif
1843         /*
1844          * pass lofault value as 4th argument to do_copy_fault
1845          */
1846         leaq    _copyout_err(%rip), %rcx
1847 
1848         movq    %gs:CPU_THREAD, %r9
1849         cmpq    %rax, %rsi              /* test uaddr < kernelbase */
1850         jae     3f                      /* take copyop if uaddr > kernelbase */
1851         SMAP_DISABLE_INSTR(3)
1852         jmp     do_copy_fault           /* Calls leave for us */
1853 
1854 _copyout_err:
1855         SMAP_ENABLE_INSTR(4)
1856         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1857         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1858 3:
1859         movq    T_COPYOPS(%r9), %rax
1860         cmpq    $0, %rax
1861         jz      2f
1862 
1863         /*
1864          * reload args for the copyop
1865          */
1866         movq    (%rsp), %rdi
1867         movq    0x8(%rsp), %rsi
1868         movq    0x10(%rsp), %rdx
1869         leave
1870         movq    CP_COPYOUT(%rax), %rax
1871         INDIRECT_JMP_REG(rax)
1872 
1873 2:      movl    $-1, %eax
1874         leave
1875         ret
1876         SET_SIZE(copyout)
1877 
1878 #elif defined(__i386)
1879 
1880 #define ARG_KADDR       4
1881 #define ARG_UADDR       8
1882 
1883         ENTRY(copyout)
1884         movl    kernelbase, %ecx
1885 #ifdef DEBUG
1886         cmpl    %ecx, ARG_KADDR(%esp)
1887         jnb     1f
1888         pushl   %ebp
1889         movl    %esp, %ebp
1890         pushl   $.copyout_panic_msg
1891         call    panic
1892 1:
1893 #endif
1894         lea     _copyout_err, %eax
1895         movl    %gs:CPU_THREAD, %edx
1896         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1897         jb      do_copy_fault
1898         jmp     3f
1899 
1900 _copyout_err:
1901         popl    %ecx
1902         popl    %edi
1903         movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1904         popl    %esi
1905         popl    %ebp
1906 3:
1907         movl    T_COPYOPS(%edx), %eax
1908         cmpl    $0, %eax
1909         jz      2f
1910         jmp     *CP_COPYOUT(%eax)
1911 
1912 2:      movl    $-1, %eax
1913         ret
1914         SET_SIZE(copyout)
1915 
1916 #undef  ARG_UADDR
1917 #undef  ARG_KADDR
1918 
1919 #endif  /* __i386 */
1920 #endif  /* __lint */
1921 
1922 #if defined(__lint)
1923 
1924 /* ARGSUSED */
1925 int
1926 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached)
1927 { return (0); }
1928 
1929 #else   /* __lint */
1930 
1931 #if defined(__amd64)
1932 
1933         ENTRY(xcopyout_nta)
1934         pushq   %rbp
1935         movq    %rsp, %rbp
1936         subq    $24, %rsp
1937 
1938         /*
1939          * save args in case we trap and need to rerun as a copyop
1940          */
1941         movq    %rdi, (%rsp)
1942         movq    %rsi, 0x8(%rsp)
1943         movq    %rdx, 0x10(%rsp)
1944 
1945         movq    kernelbase(%rip), %rax
1946 #ifdef DEBUG
1947         cmpq    %rax, %rdi              /* %rdi = kaddr */
1948         jnb     1f
1949         leaq    .xcopyout_panic_msg(%rip), %rdi
1950         xorl    %eax, %eax
1951         call    panic
1952 1:
1953 #endif
1954         movq    %gs:CPU_THREAD, %r9
1955         cmpq    %rax, %rsi              /* test uaddr < kernelbase */
1956         jae     4f
1957 
1958         cmpq    $0, %rcx                /* No non-temporal access? */
1959         /*
1960          * pass lofault value as 4th argument to do_copy_fault
1961          */
1962         leaq    _xcopyout_err(%rip), %rcx
1963         jnz     6f
1964         /*
1965          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1966          */
1967         cmpq    $XCOPY_MIN_SIZE, %rdx
1968         jae     5f
1969 6:
1970         SMAP_DISABLE_INSTR(4)
1971         jmp     do_copy_fault
1972 
1973         /*
1974          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1975          * count is COUNT_ALIGN_SIZE aligned.
1976          */
1977 5:
1978         movq    %rdi, %r10
1979         orq     %rsi, %r10
1980         andq    $NTA_ALIGN_MASK, %r10
1981         orq     %rdx, %r10
1982         andq    $COUNT_ALIGN_MASK, %r10
1983         jnz     6b
1984         leaq    _xcopyout_nta_err(%rip), %rcx
1985         SMAP_DISABLE_INSTR(5)
1986         call    do_copy_fault_nta
1987         SMAP_ENABLE_INSTR(5)
1988         ret
1989 
1990 4:
1991         movl    $EFAULT, %eax
1992         jmp     3f
1993 
1994         /*
1995          * A fault during do_copy_fault or do_copy_fault_nta is
1996          * indicated through an errno value in %rax and we iret from the
1997          * trap handler to here.
1998          */
1999 _xcopyout_err:
2000         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
2001 _xcopyout_nta_err:
2002         SMAP_ENABLE_INSTR(6)
2003         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2004 3:
2005         movq    T_COPYOPS(%r9), %r8
2006         cmpq    $0, %r8
2007         jz      2f
2008 
2009         /*
2010          * reload args for the copyop
2011          */
2012         movq    (%rsp), %rdi
2013         movq    0x8(%rsp), %rsi
2014         movq    0x10(%rsp), %rdx
2015         leave
2016         movq    CP_XCOPYOUT(%r8), %r8
2017         INDIRECT_JMP_REG(r8)
2018 
2019 2:      leave
2020         ret
2021         SET_SIZE(xcopyout_nta)
2022 
2023 #elif defined(__i386)
2024 
2025 #define ARG_KADDR       4
2026 #define ARG_UADDR       8
2027 #define ARG_COUNT       12
2028 #define ARG_CACHED      16
2029 
2030         ENTRY(xcopyout_nta)
2031         movl    kernelbase, %ecx
2032         lea     _xcopyout_err, %eax
2033         movl    %gs:CPU_THREAD, %edx
2034         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2035         jae     4f
2036 
2037         cmpl    $0, use_sse_copy        /* no sse support */
2038         jz      do_copy_fault
2039 
2040         cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
2041         jnz     do_copy_fault
2042 
2043         /*
2044          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
2045          */
2046         cmpl    $XCOPY_MIN_SIZE, %edx
2047         jb      do_copy_fault
2048 
2049         /*
2050          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2051          * count is COUNT_ALIGN_SIZE aligned.
2052          */
2053         movl    ARG_UADDR(%esp), %ecx
2054         orl     ARG_KADDR(%esp), %ecx
2055         andl    $NTA_ALIGN_MASK, %ecx
2056         orl     ARG_COUNT(%esp), %ecx
2057         andl    $COUNT_ALIGN_MASK, %ecx
2058         jnz     do_copy_fault
2059         jmp     do_copy_fault_nta
2060 
2061 4:
2062         movl    $EFAULT, %eax
2063         jmp     3f
2064 
2065         /*
2066          * A fault during do_copy_fault or do_copy_fault_nta is
2067          * indicated through an errno value in %eax and we iret from the
2068          * trap handler to here.
2069          */
2070 _xcopyout_err:
2071         / restore the original lofault
2072         popl    %ecx
2073         popl    %edi
2074         movl    %ecx, T_LOFAULT(%edx)   / original lofault
2075         popl    %esi
2076         popl    %ebp
2077 3:
2078         cmpl    $0, T_COPYOPS(%edx)
2079         jz      2f
2080         movl    T_COPYOPS(%edx), %eax
2081         jmp     *CP_XCOPYOUT(%eax)
2082 
2083 2:      rep;    ret     /* use 2 byte return instruction when branch target */
2084                         /* AMD Software Optimization Guide - Section 6.2 */
2085         SET_SIZE(xcopyout_nta)
2086 
2087 #undef  ARG_UADDR
2088 #undef  ARG_KADDR
2089 #undef  ARG_COUNT
2090 #undef  ARG_CACHED
2091 
2092 #endif  /* __i386 */
2093 #endif  /* __lint */
2094 
2095 /*
2096  * Copy a null terminated string from one point to another in
2097  * the kernel address space.
2098  */
2099 
2100 #if defined(__lint)
2101 
2102 /* ARGSUSED */
2103 int
2104 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied)
2105 { return (0); }
2106 
2107 #else   /* __lint */
2108 
2109 #if defined(__amd64)
2110 
2111         ENTRY(copystr)
2112         pushq   %rbp
2113         movq    %rsp, %rbp
2114 #ifdef DEBUG
2115         movq    kernelbase(%rip), %rax
2116         cmpq    %rax, %rdi              /* %rdi = from */
2117         jb      0f
2118         cmpq    %rax, %rsi              /* %rsi = to */
2119         jnb     1f
2120 0:      leaq    .copystr_panic_msg(%rip), %rdi
2121         xorl    %eax, %eax
2122         call    panic
2123 1:
2124 #endif
2125         movq    %gs:CPU_THREAD, %r9
2126         movq    T_LOFAULT(%r9), %r8     /* pass current lofault value as */
2127                                         /* 5th argument to do_copystr */
2128         xorl    %r10d,%r10d             /* pass smap restore need in %r10d */
2129                                         /* as a non-ABI 6th arg */
2130 do_copystr:
2131         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
2132         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
2133         movq    %r8, T_LOFAULT(%r9)     /* new lofault */
2134 
2135         movq    %rdx, %r8               /* save maxlength */
2136 
2137         cmpq    $0, %rdx                /* %rdx = maxlength */
2138         je      copystr_enametoolong    /* maxlength == 0 */
2139 
2140 copystr_loop:
2141         decq    %r8
2142         movb    (%rdi), %al
2143         incq    %rdi
2144         movb    %al, (%rsi)
2145         incq    %rsi
2146         cmpb    $0, %al
2147         je      copystr_null            /* null char */
2148         cmpq    $0, %r8
2149         jne     copystr_loop
2150 
2151 copystr_enametoolong:
2152         movl    $ENAMETOOLONG, %eax
2153         jmp     copystr_out
2154 
2155 copystr_null:
2156         xorl    %eax, %eax              /* no error */
2157 
2158 copystr_out:
2159         cmpq    $0, %rcx                /* want length? */
2160         je      copystr_smap            /* no */
2161         subq    %r8, %rdx               /* compute length and store it */
2162         movq    %rdx, (%rcx)
2163 
2164 copystr_smap:
2165         cmpl    $0, %r10d
2166         jz      copystr_done
2167         SMAP_ENABLE_INSTR(7)
2168 
2169 copystr_done:
2170         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2171         leave
2172         ret
2173         SET_SIZE(copystr)
2174 
2175 #elif defined(__i386)
2176 
2177 #define ARG_FROM        8
2178 #define ARG_TO          12
2179 #define ARG_MAXLEN      16
2180 #define ARG_LENCOPIED   20
2181 
2182         ENTRY(copystr)
2183 #ifdef DEBUG
2184         pushl   %ebp
2185         movl    %esp, %ebp
2186         movl    kernelbase, %eax
2187         cmpl    %eax, ARG_FROM(%esp)
2188         jb      0f
2189         cmpl    %eax, ARG_TO(%esp)
2190         jnb     1f
2191 0:      pushl   $.copystr_panic_msg
2192         call    panic
2193 1:      popl    %ebp
2194 #endif
2195         /* get the current lofault address */
2196         movl    %gs:CPU_THREAD, %eax
2197         movl    T_LOFAULT(%eax), %eax
2198 do_copystr:
2199         pushl   %ebp                    /* setup stack frame */
2200         movl    %esp, %ebp
2201         pushl   %ebx                    /* save registers */
2202         pushl   %edi
2203 
2204         movl    %gs:CPU_THREAD, %ebx
2205         movl    T_LOFAULT(%ebx), %edi
2206         pushl   %edi                    /* save the current lofault */
2207         movl    %eax, T_LOFAULT(%ebx)   /* new lofault */
2208 
2209         movl    ARG_MAXLEN(%ebp), %ecx
2210         cmpl    $0, %ecx
2211         je      copystr_enametoolong    /* maxlength == 0 */
2212 
2213         movl    ARG_FROM(%ebp), %ebx    /* source address */
2214         movl    ARG_TO(%ebp), %edx      /* destination address */
2215 
2216 copystr_loop:
2217         decl    %ecx
2218         movb    (%ebx), %al
2219         incl    %ebx
2220         movb    %al, (%edx)
2221         incl    %edx
2222         cmpb    $0, %al
2223         je      copystr_null            /* null char */
2224         cmpl    $0, %ecx
2225         jne     copystr_loop
2226 
2227 copystr_enametoolong:
2228         movl    $ENAMETOOLONG, %eax
2229         jmp     copystr_out
2230 
2231 copystr_null:
2232         xorl    %eax, %eax              /* no error */
2233 
2234 copystr_out:
2235         cmpl    $0, ARG_LENCOPIED(%ebp) /* want length? */
2236         je      copystr_done            /* no */
2237         movl    ARG_MAXLEN(%ebp), %edx
2238         subl    %ecx, %edx              /* compute length and store it */
2239         movl    ARG_LENCOPIED(%ebp), %ecx
2240         movl    %edx, (%ecx)
2241 
2242 copystr_done:
2243         popl    %edi
2244         movl    %gs:CPU_THREAD, %ebx
2245         movl    %edi, T_LOFAULT(%ebx)   /* restore the original lofault */
2246 
2247         popl    %edi
2248         popl    %ebx
2249         popl    %ebp
2250         ret
2251         SET_SIZE(copystr)
2252 
2253 #undef  ARG_FROM
2254 #undef  ARG_TO
2255 #undef  ARG_MAXLEN
2256 #undef  ARG_LENCOPIED
2257 
2258 #endif  /* __i386 */
2259 #endif  /* __lint */
2260 
2261 /*
2262  * Copy a null terminated string from the user address space into
2263  * the kernel address space.
2264  */
2265 
2266 #if defined(__lint)
2267 
2268 /* ARGSUSED */
2269 int
2270 copyinstr(const char *uaddr, char *kaddr, size_t maxlength,
2271     size_t *lencopied)
2272 { return (0); }
2273 
2274 #else   /* __lint */
2275 
2276 #if defined(__amd64)
2277 
2278         ENTRY(copyinstr)
2279         pushq   %rbp
2280         movq    %rsp, %rbp
2281         subq    $32, %rsp
2282 
2283         /*
2284          * save args in case we trap and need to rerun as a copyop
2285          */
2286         movq    %rdi, (%rsp)
2287         movq    %rsi, 0x8(%rsp)
2288         movq    %rdx, 0x10(%rsp)
2289         movq    %rcx, 0x18(%rsp)
2290 
2291         movq    kernelbase(%rip), %rax
2292 #ifdef DEBUG
2293         cmpq    %rax, %rsi              /* %rsi = kaddr */
2294         jnb     1f
2295         leaq    .copyinstr_panic_msg(%rip), %rdi
2296         xorl    %eax, %eax
2297         call    panic
2298 1:
2299 #endif
2300         /*
2301          * pass lofault value as 5th argument to do_copystr
2302          * do_copystr expects whether or not we need smap in %r10d
2303          */
2304         leaq    _copyinstr_error(%rip), %r8
2305         movl    $1, %r10d
2306 
2307         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
2308         jae     4f
2309         SMAP_DISABLE_INSTR(6)
2310         jmp     do_copystr
2311 4:
2312         movq    %gs:CPU_THREAD, %r9
2313         jmp     3f
2314 
2315 _copyinstr_error:
2316         SMAP_ENABLE_INSTR(8)
2317         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2318 3:
2319         movq    T_COPYOPS(%r9), %rax
2320         cmpq    $0, %rax
2321         jz      2f
2322 
2323         /*
2324          * reload args for the copyop
2325          */
2326         movq    (%rsp), %rdi
2327         movq    0x8(%rsp), %rsi
2328         movq    0x10(%rsp), %rdx
2329         movq    0x18(%rsp), %rcx
2330         leave
2331         movq    CP_COPYINSTR(%rax), %rax
2332         INDIRECT_JMP_REG(rax)
2333 
2334 2:      movl    $EFAULT, %eax           /* return EFAULT */
2335         leave
2336         ret
2337         SET_SIZE(copyinstr)
2338 
2339 #elif defined(__i386)
2340 
2341 #define ARG_UADDR       4
2342 #define ARG_KADDR       8
2343 
2344         ENTRY(copyinstr)
2345         movl    kernelbase, %ecx
2346 #ifdef DEBUG
2347         cmpl    %ecx, ARG_KADDR(%esp)
2348         jnb     1f
2349         pushl   %ebp
2350         movl    %esp, %ebp
2351         pushl   $.copyinstr_panic_msg
2352         call    panic
2353 1:
2354 #endif
2355         lea     _copyinstr_error, %eax
2356         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2357         jb      do_copystr
2358         movl    %gs:CPU_THREAD, %edx
2359         jmp     3f
2360 
2361 _copyinstr_error:
2362         popl    %edi
2363         movl    %gs:CPU_THREAD, %edx
2364         movl    %edi, T_LOFAULT(%edx)   /* original lofault */
2365 
2366         popl    %edi
2367         popl    %ebx
2368         popl    %ebp
2369 3:
2370         movl    T_COPYOPS(%edx), %eax
2371         cmpl    $0, %eax
2372         jz      2f
2373         jmp     *CP_COPYINSTR(%eax)
2374 
2375 2:      movl    $EFAULT, %eax           /* return EFAULT */
2376         ret
2377         SET_SIZE(copyinstr)
2378 
2379 #undef  ARG_UADDR
2380 #undef  ARG_KADDR
2381 
2382 #endif  /* __i386 */
2383 #endif  /* __lint */
2384 
2385 /*
2386  * Copy a null terminated string from the kernel
2387  * address space to the user address space.
2388  */
2389 
2390 #if defined(__lint)
2391 
2392 /* ARGSUSED */
2393 int
2394 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength,
2395     size_t *lencopied)
2396 { return (0); }
2397 
2398 #else   /* __lint */
2399 
2400 #if defined(__amd64)
2401 
2402         ENTRY(copyoutstr)
2403         pushq   %rbp
2404         movq    %rsp, %rbp
2405         subq    $32, %rsp
2406 
2407         /*
2408          * save args in case we trap and need to rerun as a copyop
2409          */
2410         movq    %rdi, (%rsp)
2411         movq    %rsi, 0x8(%rsp)
2412         movq    %rdx, 0x10(%rsp)
2413         movq    %rcx, 0x18(%rsp)
2414 
2415         movq    kernelbase(%rip), %rax
2416 #ifdef DEBUG
2417         cmpq    %rax, %rdi              /* %rdi = kaddr */
2418         jnb     1f
2419         leaq    .copyoutstr_panic_msg(%rip), %rdi
2420         jmp     call_panic              /* setup stack and call panic */
2421 1:
2422 #endif
2423         /*
2424          * pass lofault value as 5th argument to do_copystr
2425          * pass one as 6th argument to do_copystr in %r10d
2426          */
2427         leaq    _copyoutstr_error(%rip), %r8
2428         movl    $1, %r10d
2429 
2430         cmpq    %rax, %rsi              /* test uaddr < kernelbase */
2431         jae     4f
2432         SMAP_DISABLE_INSTR(7)
2433         jmp     do_copystr
2434 4:
2435         movq    %gs:CPU_THREAD, %r9
2436         jmp     3f
2437 
2438 _copyoutstr_error:
2439         SMAP_ENABLE_INSTR(9)
2440         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2441 3:
2442         movq    T_COPYOPS(%r9), %rax
2443         cmpq    $0, %rax
2444         jz      2f
2445 
2446         /*
2447          * reload args for the copyop
2448          */
2449         movq    (%rsp), %rdi
2450         movq    0x8(%rsp), %rsi
2451         movq    0x10(%rsp), %rdx
2452         movq    0x18(%rsp), %rcx
2453         leave
2454         movq    CP_COPYOUTSTR(%rax), %rax
2455         INDIRECT_JMP_REG(rax)
2456 
2457 2:      movl    $EFAULT, %eax           /* return EFAULT */
2458         leave
2459         ret
2460         SET_SIZE(copyoutstr)
2461 
2462 #elif defined(__i386)
2463 
2464 #define ARG_KADDR       4
2465 #define ARG_UADDR       8
2466 
2467         ENTRY(copyoutstr)
2468         movl    kernelbase, %ecx
2469 #ifdef DEBUG
2470         cmpl    %ecx, ARG_KADDR(%esp)
2471         jnb     1f
2472         pushl   %ebp
2473         movl    %esp, %ebp
2474         pushl   $.copyoutstr_panic_msg
2475         call    panic
2476 1:
2477 #endif
2478         lea     _copyoutstr_error, %eax
2479         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2480         jb      do_copystr
2481         movl    %gs:CPU_THREAD, %edx
2482         jmp     3f
2483 
2484 _copyoutstr_error:
2485         popl    %edi
2486         movl    %gs:CPU_THREAD, %edx
2487         movl    %edi, T_LOFAULT(%edx)   /* restore the original lofault */
2488 
2489         popl    %edi
2490         popl    %ebx
2491         popl    %ebp
2492 3:
2493         movl    T_COPYOPS(%edx), %eax
2494         cmpl    $0, %eax
2495         jz      2f
2496         jmp     *CP_COPYOUTSTR(%eax)
2497 
2498 2:      movl    $EFAULT, %eax           /* return EFAULT */
2499         ret
2500         SET_SIZE(copyoutstr)
2501 
2502 #undef  ARG_KADDR
2503 #undef  ARG_UADDR
2504 
2505 #endif  /* __i386 */
2506 #endif  /* __lint */
2507 
2508 /*
2509  * Since all of the fuword() variants are so similar, we have a macro to spit
2510  * them out.  This allows us to create DTrace-unobservable functions easily.
2511  */
2512 
2513 #if defined(__lint)
2514 
2515 #if defined(__amd64)
2516 
2517 /* ARGSUSED */
2518 int
2519 fuword64(const void *addr, uint64_t *dst)
2520 { return (0); }
2521 
2522 #endif
2523 
2524 /* ARGSUSED */
2525 int
2526 fuword32(const void *addr, uint32_t *dst)
2527 { return (0); }
2528 
2529 /* ARGSUSED */
2530 int
2531 fuword16(const void *addr, uint16_t *dst)
2532 { return (0); }
2533 
2534 /* ARGSUSED */
2535 int
2536 fuword8(const void *addr, uint8_t *dst)
2537 { return (0); }
2538 
2539 #else   /* __lint */
2540 
2541 #if defined(__amd64)
2542 
2543 /*
2544  * Note that we don't save and reload the arguments here
2545  * because their values are not altered in the copy path.
2546  * Additionally, when successful, the smap_enable jmp will
2547  * actually return us to our original caller.
2548  */
2549 
2550 #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2551         ENTRY(NAME)                             \
2552         movq    %gs:CPU_THREAD, %r9;            \
2553         cmpq    kernelbase(%rip), %rdi;         \
2554         jae     1f;                             \
2555         leaq    _flt_/**/NAME, %rdx;            \
2556         movq    %rdx, T_LOFAULT(%r9);           \
2557         SMAP_DISABLE_INSTR(DISNUM)              \
2558         INSTR   (%rdi), REG;                    \
2559         movq    $0, T_LOFAULT(%r9);             \
2560         INSTR   REG, (%rsi);                    \
2561         xorl    %eax, %eax;                     \
2562         SMAP_ENABLE_INSTR(EN1)                  \
2563         ret;                                    \
2564 _flt_/**/NAME:                                  \
2565         SMAP_ENABLE_INSTR(EN2)                  \
2566         movq    $0, T_LOFAULT(%r9);             \
2567 1:                                              \
2568         movq    T_COPYOPS(%r9), %rax;           \
2569         cmpq    $0, %rax;                       \
2570         jz      2f;                             \
2571         movq    COPYOP(%rax), %rax;             \
2572         INDIRECT_JMP_REG(rax);                  \
2573 2:                                              \
2574         movl    $-1, %eax;                      \
2575         ret;                                    \
2576         SET_SIZE(NAME)
2577 
2578         FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2579         FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2580         FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2581         FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2582 
2583 #elif defined(__i386)
2584 
2585 #define FUWORD(NAME, INSTR, REG, COPYOP)        \
2586         ENTRY(NAME)                             \
2587         movl    %gs:CPU_THREAD, %ecx;           \
2588         movl    kernelbase, %eax;               \
2589         cmpl    %eax, 4(%esp);                  \
2590         jae     1f;                             \
2591         lea     _flt_/**/NAME, %edx;            \
2592         movl    %edx, T_LOFAULT(%ecx);          \
2593         movl    4(%esp), %eax;                  \
2594         movl    8(%esp), %edx;                  \
2595         INSTR   (%eax), REG;                    \
2596         movl    $0, T_LOFAULT(%ecx);            \
2597         INSTR   REG, (%edx);                    \
2598         xorl    %eax, %eax;                     \
2599         ret;                                    \
2600 _flt_/**/NAME:                                  \
2601         movl    $0, T_LOFAULT(%ecx);            \
2602 1:                                              \
2603         movl    T_COPYOPS(%ecx), %eax;          \
2604         cmpl    $0, %eax;                       \
2605         jz      2f;                             \
2606         jmp     *COPYOP(%eax);                  \
2607 2:                                              \
2608         movl    $-1, %eax;                      \
2609         ret;                                    \
2610         SET_SIZE(NAME)
2611 
2612         FUWORD(fuword32, movl, %eax, CP_FUWORD32)
2613         FUWORD(fuword16, movw, %ax, CP_FUWORD16)
2614         FUWORD(fuword8, movb, %al, CP_FUWORD8)
2615 
2616 #endif  /* __i386 */
2617 
2618 #undef  FUWORD
2619 
2620 #endif  /* __lint */
2621 
2622 /*
2623  * Set user word.
2624  */
2625 
2626 #if defined(__lint)
2627 
2628 #if defined(__amd64)
2629 
2630 /* ARGSUSED */
2631 int
2632 suword64(void *addr, uint64_t value)
2633 { return (0); }
2634 
2635 #endif
2636 
2637 /* ARGSUSED */
2638 int
2639 suword32(void *addr, uint32_t value)
2640 { return (0); }
2641 
2642 /* ARGSUSED */
2643 int
2644 suword16(void *addr, uint16_t value)
2645 { return (0); }
2646 
2647 /* ARGSUSED */
2648 int
2649 suword8(void *addr, uint8_t value)
2650 { return (0); }
2651 
2652 #else   /* lint */
2653 
2654 #if defined(__amd64)
2655 
2656 /*
2657  * Note that we don't save and reload the arguments here
2658  * because their values are not altered in the copy path.
2659  */
2660 
2661 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2662         ENTRY(NAME)                             \
2663         movq    %gs:CPU_THREAD, %r9;            \
2664         cmpq    kernelbase(%rip), %rdi;         \
2665         jae     1f;                             \
2666         leaq    _flt_/**/NAME, %rdx;            \
2667         SMAP_DISABLE_INSTR(DISNUM)              \
2668         movq    %rdx, T_LOFAULT(%r9);           \
2669         INSTR   REG, (%rdi);                    \
2670         movq    $0, T_LOFAULT(%r9);             \
2671         xorl    %eax, %eax;                     \
2672         SMAP_ENABLE_INSTR(EN1)                  \
2673         ret;                                    \
2674 _flt_/**/NAME:                                  \
2675         SMAP_ENABLE_INSTR(EN2)                  \
2676         movq    $0, T_LOFAULT(%r9);             \
2677 1:                                              \
2678         movq    T_COPYOPS(%r9), %rax;           \
2679         cmpq    $0, %rax;                       \
2680         jz      3f;                             \
2681         movq    COPYOP(%rax), %rax;             \
2682         INDIRECT_JMP_REG(rax);                  \
2683 3:                                              \
2684         movl    $-1, %eax;                      \
2685         ret;                                    \
2686         SET_SIZE(NAME)
2687 
2688         SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2689         SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2690         SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2691         SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2692 
2693 #elif defined(__i386)
2694 
2695 #define SUWORD(NAME, INSTR, REG, COPYOP)        \
2696         ENTRY(NAME)                             \
2697         movl    %gs:CPU_THREAD, %ecx;           \
2698         movl    kernelbase, %eax;               \
2699         cmpl    %eax, 4(%esp);                  \
2700         jae     1f;                             \
2701         lea     _flt_/**/NAME, %edx;            \
2702         movl    %edx, T_LOFAULT(%ecx);          \
2703         movl    4(%esp), %eax;                  \
2704         movl    8(%esp), %edx;                  \
2705         INSTR   REG, (%eax);                    \
2706         movl    $0, T_LOFAULT(%ecx);            \
2707         xorl    %eax, %eax;                     \
2708         ret;                                    \
2709 _flt_/**/NAME:                                  \
2710         movl    $0, T_LOFAULT(%ecx);            \
2711 1:                                              \
2712         movl    T_COPYOPS(%ecx), %eax;          \
2713         cmpl    $0, %eax;                       \
2714         jz      3f;                             \
2715         movl    COPYOP(%eax), %ecx;             \
2716         jmp     *%ecx;                          \
2717 3:                                              \
2718         movl    $-1, %eax;                      \
2719         ret;                                    \
2720         SET_SIZE(NAME)
2721 
2722         SUWORD(suword32, movl, %edx, CP_SUWORD32)
2723         SUWORD(suword16, movw, %dx, CP_SUWORD16)
2724         SUWORD(suword8, movb, %dl, CP_SUWORD8)
2725 
2726 #endif  /* __i386 */
2727 
2728 #undef  SUWORD
2729 
2730 #endif  /* __lint */
2731 
2732 #if defined(__lint)
2733 
2734 #if defined(__amd64)
2735 
2736 /*ARGSUSED*/
2737 void
2738 fuword64_noerr(const void *addr, uint64_t *dst)
2739 {}
2740 
2741 #endif
2742 
2743 /*ARGSUSED*/
2744 void
2745 fuword32_noerr(const void *addr, uint32_t *dst)
2746 {}
2747 
2748 /*ARGSUSED*/
2749 void
2750 fuword8_noerr(const void *addr, uint8_t *dst)
2751 {}
2752 
2753 /*ARGSUSED*/
2754 void
2755 fuword16_noerr(const void *addr, uint16_t *dst)
2756 {}
2757 
2758 #else   /* __lint */
2759 
2760 #if defined(__amd64)
2761 
2762 #define FUWORD_NOERR(NAME, INSTR, REG)          \
2763         ENTRY(NAME)                             \
2764         cmpq    kernelbase(%rip), %rdi;         \
2765         cmovnbq kernelbase(%rip), %rdi;         \
2766         INSTR   (%rdi), REG;                    \
2767         INSTR   REG, (%rsi);                    \
2768         ret;                                    \
2769         SET_SIZE(NAME)
2770 
2771         FUWORD_NOERR(fuword64_noerr, movq, %rax)
2772         FUWORD_NOERR(fuword32_noerr, movl, %eax)
2773         FUWORD_NOERR(fuword16_noerr, movw, %ax)
2774         FUWORD_NOERR(fuword8_noerr, movb, %al)
2775 
2776 #elif defined(__i386)
2777 
2778 #define FUWORD_NOERR(NAME, INSTR, REG)          \
2779         ENTRY(NAME)                             \
2780         movl    4(%esp), %eax;                  \
2781         cmpl    kernelbase, %eax;               \
2782         jb      1f;                             \
2783         movl    kernelbase, %eax;               \
2784 1:      movl    8(%esp), %edx;                  \
2785         INSTR   (%eax), REG;                    \
2786         INSTR   REG, (%edx);                    \
2787         ret;                                    \
2788         SET_SIZE(NAME)
2789 
2790         FUWORD_NOERR(fuword32_noerr, movl, %ecx)
2791         FUWORD_NOERR(fuword16_noerr, movw, %cx)
2792         FUWORD_NOERR(fuword8_noerr, movb, %cl)
2793 
2794 #endif  /* __i386 */
2795 
2796 #undef  FUWORD_NOERR
2797 
2798 #endif  /* __lint */
2799 
2800 #if defined(__lint)
2801 
2802 #if defined(__amd64)
2803 
2804 /*ARGSUSED*/
2805 void
2806 suword64_noerr(void *addr, uint64_t value)
2807 {}
2808 
2809 #endif
2810 
2811 /*ARGSUSED*/
2812 void
2813 suword32_noerr(void *addr, uint32_t value)
2814 {}
2815 
2816 /*ARGSUSED*/
2817 void
2818 suword16_noerr(void *addr, uint16_t value)
2819 {}
2820 
2821 /*ARGSUSED*/
2822 void
2823 suword8_noerr(void *addr, uint8_t value)
2824 {}
2825 
2826 #else   /* lint */
2827 
2828 #if defined(__amd64)
2829 
2830 #define SUWORD_NOERR(NAME, INSTR, REG)          \
2831         ENTRY(NAME)                             \
2832         cmpq    kernelbase(%rip), %rdi;         \
2833         cmovnbq kernelbase(%rip), %rdi;         \
2834         INSTR   REG, (%rdi);                    \
2835         ret;                                    \
2836         SET_SIZE(NAME)
2837 
2838         SUWORD_NOERR(suword64_noerr, movq, %rsi)
2839         SUWORD_NOERR(suword32_noerr, movl, %esi)
2840         SUWORD_NOERR(suword16_noerr, movw, %si)
2841         SUWORD_NOERR(suword8_noerr, movb, %sil)
2842 
2843 #elif defined(__i386)
2844 
2845 #define SUWORD_NOERR(NAME, INSTR, REG)          \
2846         ENTRY(NAME)                             \
2847         movl    4(%esp), %eax;                  \
2848         cmpl    kernelbase, %eax;               \
2849         jb      1f;                             \
2850         movl    kernelbase, %eax;               \
2851 1:                                              \
2852         movl    8(%esp), %edx;                  \
2853         INSTR   REG, (%eax);                    \
2854         ret;                                    \
2855         SET_SIZE(NAME)
2856 
2857         SUWORD_NOERR(suword32_noerr, movl, %edx)
2858         SUWORD_NOERR(suword16_noerr, movw, %dx)
2859         SUWORD_NOERR(suword8_noerr, movb, %dl)
2860 
2861 #endif  /* __i386 */
2862 
2863 #undef  SUWORD_NOERR
2864 
2865 #endif  /* lint */
2866 
2867 
2868 #if defined(__lint)
2869 
2870 /*ARGSUSED*/
2871 int
2872 subyte(void *addr, uchar_t value)
2873 { return (0); }
2874 
2875 /*ARGSUSED*/
2876 void
2877 subyte_noerr(void *addr, uchar_t value)
2878 {}
2879 
2880 /*ARGSUSED*/
2881 int
2882 fulword(const void *addr, ulong_t *valuep)
2883 { return (0); }
2884 
2885 /*ARGSUSED*/
2886 void
2887 fulword_noerr(const void *addr, ulong_t *valuep)
2888 {}
2889 
2890 /*ARGSUSED*/
2891 int
2892 sulword(void *addr, ulong_t valuep)
2893 { return (0); }
2894 
2895 /*ARGSUSED*/
2896 void
2897 sulword_noerr(void *addr, ulong_t valuep)
2898 {}
2899 
2900 #else
2901 
2902         .weak   subyte
2903         subyte=suword8
2904         .weak   subyte_noerr
2905         subyte_noerr=suword8_noerr
2906 
2907 #if defined(__amd64)
2908 
2909         .weak   fulword
2910         fulword=fuword64
2911         .weak   fulword_noerr
2912         fulword_noerr=fuword64_noerr
2913         .weak   sulword
2914         sulword=suword64
2915         .weak   sulword_noerr
2916         sulword_noerr=suword64_noerr
2917 
2918 #elif defined(__i386)
2919 
2920         .weak   fulword
2921         fulword=fuword32
2922         .weak   fulword_noerr
2923         fulword_noerr=fuword32_noerr
2924         .weak   sulword
2925         sulword=suword32
2926         .weak   sulword_noerr
2927         sulword_noerr=suword32_noerr
2928 
2929 #endif /* __i386 */
2930 
2931 #endif /* __lint */
2932 
2933 #if defined(__lint)
2934 
2935 /*
2936  * Copy a block of storage - must not overlap (from + len <= to).
2937  * No fault handler installed (to be called under on_fault())
2938  */
2939 
2940 /* ARGSUSED */
2941 void
2942 copyout_noerr(const void *kfrom, void *uto, size_t count)
2943 {}
2944 
2945 /* ARGSUSED */
2946 void
2947 copyin_noerr(const void *ufrom, void *kto, size_t count)
2948 {}
2949 
2950 /*
2951  * Zero a block of storage in user space
2952  */
2953 
2954 /* ARGSUSED */
2955 void
2956 uzero(void *addr, size_t count)
2957 {}
2958 
2959 /*
2960  * copy a block of storage in user space
2961  */
2962 
2963 /* ARGSUSED */
2964 void
2965 ucopy(const void *ufrom, void *uto, size_t ulength)
2966 {}
2967 
2968 /*
2969  * copy a string in user space
2970  */
2971 
2972 /* ARGSUSED */
2973 void
2974 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied)
2975 {}
2976 
2977 #else /* __lint */
2978 
2979 #if defined(__amd64)
2980 
2981         ENTRY(copyin_noerr)
2982         movq    kernelbase(%rip), %rax
2983 #ifdef DEBUG
2984         cmpq    %rax, %rsi              /* %rsi = kto */
2985         jae     1f
2986         leaq    .cpyin_ne_pmsg(%rip), %rdi
2987         jmp     call_panic              /* setup stack and call panic */
2988 1:
2989 #endif
2990         cmpq    %rax, %rdi              /* ufrom < kernelbase */
2991         jb      do_copy
2992         movq    %rax, %rdi              /* force fault at kernelbase */
2993         jmp     do_copy
2994         SET_SIZE(copyin_noerr)
2995 
2996         ENTRY(copyout_noerr)
2997         movq    kernelbase(%rip), %rax
2998 #ifdef DEBUG
2999         cmpq    %rax, %rdi              /* %rdi = kfrom */
3000         jae     1f
3001         leaq    .cpyout_ne_pmsg(%rip), %rdi
3002         jmp     call_panic              /* setup stack and call panic */
3003 1:
3004 #endif
3005         cmpq    %rax, %rsi              /* uto < kernelbase */
3006         jb      do_copy
3007         movq    %rax, %rsi              /* force fault at kernelbase */
3008         jmp     do_copy
3009         SET_SIZE(copyout_noerr)
3010 
3011         ENTRY(uzero)
3012         movq    kernelbase(%rip), %rax
3013         cmpq    %rax, %rdi
3014         jb      do_zero
3015         movq    %rax, %rdi      /* force fault at kernelbase */
3016         jmp     do_zero
3017         SET_SIZE(uzero)
3018 
3019         ENTRY(ucopy)
3020         movq    kernelbase(%rip), %rax
3021         cmpq    %rax, %rdi
3022         cmovaeq %rax, %rdi      /* force fault at kernelbase */
3023         cmpq    %rax, %rsi
3024         cmovaeq %rax, %rsi      /* force fault at kernelbase */
3025         jmp     do_copy
3026         SET_SIZE(ucopy)
3027 
3028         /*
3029          * Note, the frame pointer is required here becuase do_copystr expects
3030          * to be able to pop it off!
3031          */
3032         ENTRY(ucopystr)
3033         pushq   %rbp
3034         movq    %rsp, %rbp
3035         movq    kernelbase(%rip), %rax
3036         cmpq    %rax, %rdi
3037         cmovaeq %rax, %rdi      /* force fault at kernelbase */
3038         cmpq    %rax, %rsi
3039         cmovaeq %rax, %rsi      /* force fault at kernelbase */
3040         /* do_copystr expects lofault address in %r8 */
3041         /* do_copystr expects whether or not we need smap in %r10 */
3042         xorl    %r10d, %r10d
3043         movq    %gs:CPU_THREAD, %r8
3044         movq    T_LOFAULT(%r8), %r8
3045         jmp     do_copystr
3046         SET_SIZE(ucopystr)
3047 
3048 #elif defined(__i386)
3049 
3050         ENTRY(copyin_noerr)
3051         movl    kernelbase, %eax
3052 #ifdef DEBUG
3053         cmpl    %eax, 8(%esp)
3054         jae     1f
3055         pushl   $.cpyin_ne_pmsg
3056         call    panic
3057 1:
3058 #endif
3059         cmpl    %eax, 4(%esp)
3060         jb      do_copy
3061         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3062         jmp     do_copy
3063         SET_SIZE(copyin_noerr)
3064 
3065         ENTRY(copyout_noerr)
3066         movl    kernelbase, %eax
3067 #ifdef DEBUG
3068         cmpl    %eax, 4(%esp)
3069         jae     1f
3070         pushl   $.cpyout_ne_pmsg
3071         call    panic
3072 1:
3073 #endif
3074         cmpl    %eax, 8(%esp)
3075         jb      do_copy
3076         movl    %eax, 8(%esp)   /* force fault at kernelbase */
3077         jmp     do_copy
3078         SET_SIZE(copyout_noerr)
3079 
3080         ENTRY(uzero)
3081         movl    kernelbase, %eax
3082         cmpl    %eax, 4(%esp)
3083         jb      do_zero
3084         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3085         jmp     do_zero
3086         SET_SIZE(uzero)
3087 
3088         ENTRY(ucopy)
3089         movl    kernelbase, %eax
3090         cmpl    %eax, 4(%esp)
3091         jb      1f
3092         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3093 1:
3094         cmpl    %eax, 8(%esp)
3095         jb      do_copy
3096         movl    %eax, 8(%esp)   /* force fault at kernelbase */
3097         jmp     do_copy
3098         SET_SIZE(ucopy)
3099 
3100         ENTRY(ucopystr)
3101         movl    kernelbase, %eax
3102         cmpl    %eax, 4(%esp)
3103         jb      1f
3104         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3105 1:
3106         cmpl    %eax, 8(%esp)
3107         jb      2f
3108         movl    %eax, 8(%esp)   /* force fault at kernelbase */
3109 2:
3110         /* do_copystr expects the lofault address in %eax */
3111         movl    %gs:CPU_THREAD, %eax
3112         movl    T_LOFAULT(%eax), %eax
3113         jmp     do_copystr
3114         SET_SIZE(ucopystr)
3115 
3116 #endif  /* __i386 */
3117 
3118 #ifdef DEBUG
3119         .data
3120 .kcopy_panic_msg:
3121         .string "kcopy: arguments below kernelbase"
3122 .bcopy_panic_msg:
3123         .string "bcopy: arguments below kernelbase"
3124 .kzero_panic_msg:
3125         .string "kzero: arguments below kernelbase"
3126 .bzero_panic_msg:
3127         .string "bzero: arguments below kernelbase"
3128 .copyin_panic_msg:
3129         .string "copyin: kaddr argument below kernelbase"
3130 .xcopyin_panic_msg:
3131         .string "xcopyin: kaddr argument below kernelbase"
3132 .copyout_panic_msg:
3133         .string "copyout: kaddr argument below kernelbase"
3134 .xcopyout_panic_msg:
3135         .string "xcopyout: kaddr argument below kernelbase"
3136 .copystr_panic_msg:
3137         .string "copystr: arguments in user space"
3138 .copyinstr_panic_msg:
3139         .string "copyinstr: kaddr argument not in kernel address space"
3140 .copyoutstr_panic_msg:
3141         .string "copyoutstr: kaddr argument not in kernel address space"
3142 .cpyin_ne_pmsg:
3143         .string "copyin_noerr: argument not in kernel address space"
3144 .cpyout_ne_pmsg:
3145         .string "copyout_noerr: argument not in kernel address space"
3146 #endif
3147 
3148 #endif  /* __lint */
3149 
3150 /*
3151  * These functions are used for SMAP, supervisor mode access protection. They
3152  * are hotpatched to become real instructions when the system starts up which is
3153  * done in mlsetup() as a part of enabling the other CR4 related features.
3154  *
3155  * Generally speaking, smap_disable() is a stac instruction and smap_enable is a
3156  * clac instruction. It's safe to call these any number of times, and in fact,
3157  * out of paranoia, the kernel will likely call it at several points.
3158  */
3159 
3160 #if defined(__lint)
3161 
3162 void
3163 smap_enable(void)
3164 {}
3165 
3166 void
3167 smap_disable(void)
3168 {}
3169 
3170 #else
3171 
3172 #if defined (__amd64) || defined(__i386)
3173         ENTRY(smap_disable)
3174         nop
3175         nop
3176         nop
3177         ret
3178         SET_SIZE(smap_disable)
3179 
3180         ENTRY(smap_enable)
3181         nop
3182         nop
3183         nop
3184         ret
3185         SET_SIZE(smap_enable)
3186 
3187 #endif /* __amd64 || __i386 */
3188 
3189 #endif /* __lint */
3190 
3191 #ifndef __lint
3192 
3193 .data
3194 .align  4
3195 .globl  _smap_enable_patch_count
3196 .type   _smap_enable_patch_count,@object
3197 .size   _smap_enable_patch_count, 4
3198 _smap_enable_patch_count:
3199         .long   SMAP_ENABLE_COUNT
3200 
3201 .globl  _smap_disable_patch_count
3202 .type   _smap_disable_patch_count,@object
3203 .size   _smap_disable_patch_count, 4
3204 _smap_disable_patch_count:
3205         .long SMAP_DISABLE_COUNT
3206 
3207 #endif /* __lint */