1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2009, Intel Corporation
  28  * All rights reserved.
  29  */
  30 
  31 /*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.        */
  32 /*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T              */
  33 /*         All Rights Reserved                                          */
  34 
  35 /*       Copyright (c) 1987, 1988 Microsoft Corporation                 */
  36 /*         All Rights Reserved                                          */
  37 
  38 /*
  39  * Copyright (c) 2018 Joyent, Inc.
  40  */
  41 
  42 #include <sys/errno.h>
  43 #include <sys/asm_linkage.h>
  44 
  45 #if defined(__lint)
  46 #include <sys/types.h>
  47 #include <sys/systm.h>
  48 #else   /* __lint */
  49 #include "assym.h"
  50 #endif  /* __lint */
  51 
  52 #define KCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  53 #define XCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  54 /*
  55  * Non-temopral access (NTA) alignment requirement
  56  */
  57 #define NTA_ALIGN_SIZE  4       /* Must be at least 4-byte aligned */
  58 #define NTA_ALIGN_MASK  _CONST(NTA_ALIGN_SIZE-1)
  59 #define COUNT_ALIGN_SIZE        16      /* Must be at least 16-byte aligned */
  60 #define COUNT_ALIGN_MASK        _CONST(COUNT_ALIGN_SIZE-1)
  61 
  62 /*
  63  * With the introduction of Broadwell, Intel has introduced supervisor mode
  64  * access protection -- SMAP. SMAP forces the kernel to set certain bits to
  65  * enable access of user pages (AC in rflags, defines as PS_ACHK in
  66  * <sys/psw.h>). One of the challenges is that the implementation of many of the
  67  * userland copy routines directly use the kernel ones. For example, copyin and
  68  * copyout simply go and jump to the do_copy_fault label and traditionally let
  69  * those deal with the return for them. In fact, changing that is a can of frame
  70  * pointers.
  71  *
  72  * Rules and Constraints:
  73  *
  74  * 1. For anything that's not in copy.s, we have it do explicit calls to the
  75  * smap related code. It usually is in a position where it is able to. This is
  76  * restricted to the following three places: DTrace, resume() in swtch.s and
  77  * on_fault/no_fault. If you want to add it somewhere else, we should be
  78  * thinking twice.
  79  *
  80  * 2. We try to toggle this at the smallest window possible. This means that if
  81  * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
  82  * other function, we will always leave with SMAP enabled (the kernel cannot
  83  * access user pages).
  84  *
  85  * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
  86  * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
  87  * which already takes care of ensuring that SMAP is enabled and disabled. Note
  88  * this means that when under an on_fault()/no_fault() handler, one must not
  89  * call the non-*_noeer() routines.
  90  *
  91  * 4. The first thing we should do after coming out of an lofault handler is to
  92  * make sure that we call smap_enable again to ensure that we are safely
  93  * protected, as more often than not, we will have disabled smap to get there.
  94  *
  95  * 5. The SMAP functions, smap_enable and smap_disable may not touch any
  96  * registers beyond those done by the call and ret. These routines may be called
  97  * from arbitrary contexts in copy.s where we have slightly more special ABIs in
  98  * place.
  99  *
 100  * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
 101  * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and
 102  * smap_disable()). If the number of these is changed, you must update the
 103  * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
 104  *
 105  * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is
 106  * no known technical reason preventing it from being enabled.
 107  *
 108  * 8. Generally this .s file is processed by a K&R style cpp. This means that it
 109  * really has a lot of feelings about whitespace. In particular, if you have a
 110  * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
 111  *
 112  * 9. The smap_enable and smap_disable functions should not generally be called.
 113  * They exist such that DTrace and on_trap() may use them, that's it.
 114  *
 115  * 10. In general, the kernel has its own value for rflags that gets used. This
 116  * is maintained in a few different places which vary based on how the thread
 117  * comes into existence and whether it's a user thread. In general, when the
 118  * kernel takes a trap, it always will set ourselves to a known set of flags,
 119  * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
 120  * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
 121  * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
 122  * where that gets masked off.
 123  */
 124 
 125 /*
 126  * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
 127  * "rep smovq" for large sizes. Performance data shows that many calls to
 128  * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
 129  * these small sizes unrolled code is used. For medium sizes loops writing
 130  * 64-bytes per loop are used. Transition points were determined experimentally.
 131  */ 
 132 #define BZERO_USE_REP   (1024)
 133 #define BCOPY_DFLT_REP  (128)
 134 #define BCOPY_NHM_REP   (768)
 135 
 136 /*
 137  * Copy a block of storage, returning an error code if `from' or
 138  * `to' takes a kernel pagefault which cannot be resolved.
 139  * Returns errno value on pagefault error, 0 if all ok
 140  */
 141 
 142 /*
 143  * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
 144  * additional call instructions.
 145  */
 146 #if defined(__amd64)
 147 #define SMAP_DISABLE_COUNT      16
 148 #define SMAP_ENABLE_COUNT       26
 149 #elif defined(__i386)
 150 #define SMAP_DISABLE_COUNT      0
 151 #define SMAP_ENABLE_COUNT       0
 152 #endif
 153 
 154 #define SMAP_DISABLE_INSTR(ITER)                \
 155         .globl  _smap_disable_patch_/**/ITER;   \
 156         _smap_disable_patch_/**/ITER/**/:;      \
 157         nop; nop; nop;
 158 
 159 #define SMAP_ENABLE_INSTR(ITER)                 \
 160         .globl  _smap_enable_patch_/**/ITER;    \
 161         _smap_enable_patch_/**/ITER/**/:;       \
 162         nop; nop; nop;
 163 
 164 #if defined(__lint)
 165 
 166 /* ARGSUSED */
 167 int
 168 kcopy(const void *from, void *to, size_t count)
 169 { return (0); }
 170 
 171 #else   /* __lint */
 172 
 173         .globl  kernelbase
 174         .globl  postbootkernelbase
 175 
 176 #if defined(__amd64)
 177 
 178         ENTRY(kcopy)
 179         pushq   %rbp
 180         movq    %rsp, %rbp
 181 #ifdef DEBUG
 182         cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 183         jb      0f
 184         cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 185         jnb     1f
 186 0:      leaq    .kcopy_panic_msg(%rip), %rdi
 187         xorl    %eax, %eax
 188         call    panic
 189 1:
 190 #endif
 191         /*
 192          * pass lofault value as 4th argument to do_copy_fault
 193          */
 194         leaq    _kcopy_copyerr(%rip), %rcx
 195         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
 196 
 197 do_copy_fault:
 198         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
 199         movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
 200         call    bcopy_altentry
 201         xorl    %eax, %eax              /* return 0 (success) */
 202         SMAP_ENABLE_INSTR(0)
 203 
 204         /*
 205          * A fault during do_copy_fault is indicated through an errno value
 206          * in %rax and we iretq from the trap handler to here.
 207          */
 208 _kcopy_copyerr:
 209         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
 210         leave
 211         ret
 212         SET_SIZE(kcopy)
 213 
 214 #elif defined(__i386)
 215 
 216 #define ARG_FROM        8
 217 #define ARG_TO          12
 218 #define ARG_COUNT       16
 219 
 220         ENTRY(kcopy)
 221 #ifdef DEBUG
 222         pushl   %ebp
 223         movl    %esp, %ebp
 224         movl    postbootkernelbase, %eax
 225         cmpl    %eax, ARG_FROM(%ebp)
 226         jb      0f
 227         cmpl    %eax, ARG_TO(%ebp)
 228         jnb     1f
 229 0:      pushl   $.kcopy_panic_msg
 230         call    panic
 231 1:      popl    %ebp
 232 #endif
 233         lea     _kcopy_copyerr, %eax    /* lofault value */
 234         movl    %gs:CPU_THREAD, %edx    
 235 
 236 do_copy_fault:
 237         pushl   %ebp
 238         movl    %esp, %ebp              /* setup stack frame */
 239         pushl   %esi
 240         pushl   %edi                    /* save registers */
 241 
 242         movl    T_LOFAULT(%edx), %edi
 243         pushl   %edi                    /* save the current lofault */
 244         movl    %eax, T_LOFAULT(%edx)   /* new lofault */
 245 
 246         movl    ARG_COUNT(%ebp), %ecx
 247         movl    ARG_FROM(%ebp), %esi
 248         movl    ARG_TO(%ebp), %edi
 249         shrl    $2, %ecx                /* word count */
 250         rep
 251           smovl
 252         movl    ARG_COUNT(%ebp), %ecx
 253         andl    $3, %ecx                /* bytes left over */
 254         rep
 255           smovb
 256         xorl    %eax, %eax
 257 
 258         /*
 259          * A fault during do_copy_fault is indicated through an errno value
 260          * in %eax and we iret from the trap handler to here.
 261          */
 262 _kcopy_copyerr:
 263         popl    %ecx
 264         popl    %edi
 265         movl    %ecx, T_LOFAULT(%edx)   /* restore the original lofault */
 266         popl    %esi
 267         popl    %ebp
 268         ret
 269         SET_SIZE(kcopy)
 270 
 271 #undef  ARG_FROM
 272 #undef  ARG_TO
 273 #undef  ARG_COUNT
 274 
 275 #endif  /* __i386 */
 276 #endif  /* __lint */
 277 
 278 #if defined(__lint)
 279 
 280 /*
 281  * Copy a block of storage.  Similar to kcopy but uses non-temporal
 282  * instructions.
 283  */
 284 
 285 /* ARGSUSED */
 286 int
 287 kcopy_nta(const void *from, void *to, size_t count, int copy_cached)
 288 { return (0); }
 289 
 290 #else   /* __lint */
 291 
 292 #if defined(__amd64)
 293 
 294 #define COPY_LOOP_INIT(src, dst, cnt)   \
 295         addq    cnt, src;                       \
 296         addq    cnt, dst;                       \
 297         shrq    $3, cnt;                        \
 298         neg     cnt
 299 
 300         /* Copy 16 bytes per loop.  Uses %rax and %r8 */
 301 #define COPY_LOOP_BODY(src, dst, cnt)   \
 302         prefetchnta     0x100(src, cnt, 8);     \
 303         movq    (src, cnt, 8), %rax;            \
 304         movq    0x8(src, cnt, 8), %r8;          \
 305         movnti  %rax, (dst, cnt, 8);            \
 306         movnti  %r8, 0x8(dst, cnt, 8);          \
 307         addq    $2, cnt
 308 
 309         ENTRY(kcopy_nta)
 310         pushq   %rbp
 311         movq    %rsp, %rbp
 312 #ifdef DEBUG
 313         cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 314         jb      0f
 315         cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */
 316         jnb     1f
 317 0:      leaq    .kcopy_panic_msg(%rip), %rdi
 318         xorl    %eax, %eax
 319         call    panic
 320 1:
 321 #endif
 322 
 323         movq    %gs:CPU_THREAD, %r9
 324         cmpq    $0, %rcx                /* No non-temporal access? */
 325         /*
 326          * pass lofault value as 4th argument to do_copy_fault
 327          */
 328         leaq    _kcopy_nta_copyerr(%rip), %rcx  /* doesn't set rflags */
 329         jnz     do_copy_fault           /* use regular access */
 330         /*
 331          * Make sure cnt is >= KCOPY_MIN_SIZE
 332          */
 333         cmpq    $KCOPY_MIN_SIZE, %rdx
 334         jb      do_copy_fault
 335 
 336         /*
 337          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
 338          * count is COUNT_ALIGN_SIZE aligned.
 339          */
 340         movq    %rdi, %r10
 341         orq     %rsi, %r10
 342         andq    $NTA_ALIGN_MASK, %r10
 343         orq     %rdx, %r10
 344         andq    $COUNT_ALIGN_MASK, %r10
 345         jnz     do_copy_fault
 346 
 347         ALTENTRY(do_copy_fault_nta)
 348         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
 349         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
 350         movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
 351 
 352         /*
 353          * COPY_LOOP_BODY uses %rax and %r8
 354          */
 355         COPY_LOOP_INIT(%rdi, %rsi, %rdx)
 356 2:      COPY_LOOP_BODY(%rdi, %rsi, %rdx)
 357         jnz     2b
 358 
 359         mfence
 360         xorl    %eax, %eax              /* return 0 (success) */
 361         SMAP_ENABLE_INSTR(1)
 362 
 363 _kcopy_nta_copyerr:
 364         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
 365         leave
 366         ret
 367         SET_SIZE(do_copy_fault_nta)
 368         SET_SIZE(kcopy_nta)
 369 
 370 #elif defined(__i386)
 371 
 372 #define ARG_FROM        8
 373 #define ARG_TO          12
 374 #define ARG_COUNT       16
 375 
 376 #define COPY_LOOP_INIT(src, dst, cnt)   \
 377         addl    cnt, src;                       \
 378         addl    cnt, dst;                       \
 379         shrl    $3, cnt;                        \
 380         neg     cnt
 381 
 382 #define COPY_LOOP_BODY(src, dst, cnt)   \
 383         prefetchnta     0x100(src, cnt, 8);     \
 384         movl    (src, cnt, 8), %esi;            \
 385         movnti  %esi, (dst, cnt, 8);            \
 386         movl    0x4(src, cnt, 8), %esi;         \
 387         movnti  %esi, 0x4(dst, cnt, 8);         \
 388         movl    0x8(src, cnt, 8), %esi;         \
 389         movnti  %esi, 0x8(dst, cnt, 8);         \
 390         movl    0xc(src, cnt, 8), %esi;         \
 391         movnti  %esi, 0xc(dst, cnt, 8);         \
 392         addl    $2, cnt
 393 
 394         /*
 395          * kcopy_nta is not implemented for 32-bit as no performance
 396          * improvement was shown.  We simply jump directly to kcopy
 397          * and discard the 4 arguments.
 398          */
 399         ENTRY(kcopy_nta)
 400         jmp     kcopy
 401 
 402         lea     _kcopy_nta_copyerr, %eax        /* lofault value */
 403         ALTENTRY(do_copy_fault_nta)
 404         pushl   %ebp
 405         movl    %esp, %ebp              /* setup stack frame */
 406         pushl   %esi
 407         pushl   %edi
 408 
 409         movl    %gs:CPU_THREAD, %edx    
 410         movl    T_LOFAULT(%edx), %edi
 411         pushl   %edi                    /* save the current lofault */
 412         movl    %eax, T_LOFAULT(%edx)   /* new lofault */
 413 
 414         /* COPY_LOOP_BODY needs to use %esi */
 415         movl    ARG_COUNT(%ebp), %ecx
 416         movl    ARG_FROM(%ebp), %edi
 417         movl    ARG_TO(%ebp), %eax
 418         COPY_LOOP_INIT(%edi, %eax, %ecx)
 419 1:      COPY_LOOP_BODY(%edi, %eax, %ecx)
 420         jnz     1b
 421         mfence
 422 
 423         xorl    %eax, %eax
 424 _kcopy_nta_copyerr:
 425         popl    %ecx
 426         popl    %edi
 427         movl    %ecx, T_LOFAULT(%edx)   /* restore the original lofault */
 428         popl    %esi
 429         leave
 430         ret
 431         SET_SIZE(do_copy_fault_nta)
 432         SET_SIZE(kcopy_nta)
 433 
 434 #undef  ARG_FROM
 435 #undef  ARG_TO
 436 #undef  ARG_COUNT
 437 
 438 #endif  /* __i386 */
 439 #endif  /* __lint */
 440 
 441 #if defined(__lint)
 442 
 443 /* ARGSUSED */
 444 void
 445 bcopy(const void *from, void *to, size_t count)
 446 {}
 447 
 448 #else   /* __lint */
 449 
 450 #if defined(__amd64)
 451 
 452         ENTRY(bcopy)
 453 #ifdef DEBUG
 454         orq     %rdx, %rdx              /* %rdx = count */
 455         jz      1f
 456         cmpq    postbootkernelbase(%rip), %rdi          /* %rdi = from */
 457         jb      0f
 458         cmpq    postbootkernelbase(%rip), %rsi          /* %rsi = to */         
 459         jnb     1f
 460 0:      leaq    .bcopy_panic_msg(%rip), %rdi
 461         jmp     call_panic              /* setup stack and call panic */
 462 1:
 463 #endif
 464         /*
 465          * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
 466          * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
 467          * uses these registers in future they must be saved and restored.
 468          */
 469         ALTENTRY(bcopy_altentry)
 470 do_copy:
 471 #define L(s) .bcopy/**/s
 472         cmpq    $0x50, %rdx             /* 80 */
 473         jae     bcopy_ck_size
 474 
 475         /*
 476          * Performance data shows many caller's copy small buffers. So for
 477          * best perf for these sizes unrolled code is used. Store data without
 478          * worrying about alignment.
 479          */
 480         leaq    L(fwdPxQx)(%rip), %r10
 481         addq    %rdx, %rdi
 482         addq    %rdx, %rsi
 483         movslq  (%r10,%rdx,4), %rcx
 484         leaq    (%rcx,%r10,1), %r10
 485         jmpq    *%r10
 486 
 487         .p2align 4
 488 L(fwdPxQx):
 489         .int       L(P0Q0)-L(fwdPxQx)   /* 0 */
 490         .int       L(P1Q0)-L(fwdPxQx)
 491         .int       L(P2Q0)-L(fwdPxQx)
 492         .int       L(P3Q0)-L(fwdPxQx)
 493         .int       L(P4Q0)-L(fwdPxQx)
 494         .int       L(P5Q0)-L(fwdPxQx)
 495         .int       L(P6Q0)-L(fwdPxQx)
 496         .int       L(P7Q0)-L(fwdPxQx) 
 497 
 498         .int       L(P0Q1)-L(fwdPxQx)   /* 8 */
 499         .int       L(P1Q1)-L(fwdPxQx)
 500         .int       L(P2Q1)-L(fwdPxQx)
 501         .int       L(P3Q1)-L(fwdPxQx)
 502         .int       L(P4Q1)-L(fwdPxQx)
 503         .int       L(P5Q1)-L(fwdPxQx)
 504         .int       L(P6Q1)-L(fwdPxQx)
 505         .int       L(P7Q1)-L(fwdPxQx) 
 506 
 507         .int       L(P0Q2)-L(fwdPxQx)   /* 16 */
 508         .int       L(P1Q2)-L(fwdPxQx)
 509         .int       L(P2Q2)-L(fwdPxQx)
 510         .int       L(P3Q2)-L(fwdPxQx)
 511         .int       L(P4Q2)-L(fwdPxQx)
 512         .int       L(P5Q2)-L(fwdPxQx)
 513         .int       L(P6Q2)-L(fwdPxQx)
 514         .int       L(P7Q2)-L(fwdPxQx) 
 515 
 516         .int       L(P0Q3)-L(fwdPxQx)   /* 24 */
 517         .int       L(P1Q3)-L(fwdPxQx)
 518         .int       L(P2Q3)-L(fwdPxQx)
 519         .int       L(P3Q3)-L(fwdPxQx)
 520         .int       L(P4Q3)-L(fwdPxQx)
 521         .int       L(P5Q3)-L(fwdPxQx)
 522         .int       L(P6Q3)-L(fwdPxQx)
 523         .int       L(P7Q3)-L(fwdPxQx) 
 524 
 525         .int       L(P0Q4)-L(fwdPxQx)   /* 32 */
 526         .int       L(P1Q4)-L(fwdPxQx)
 527         .int       L(P2Q4)-L(fwdPxQx)
 528         .int       L(P3Q4)-L(fwdPxQx)
 529         .int       L(P4Q4)-L(fwdPxQx)
 530         .int       L(P5Q4)-L(fwdPxQx)
 531         .int       L(P6Q4)-L(fwdPxQx)
 532         .int       L(P7Q4)-L(fwdPxQx) 
 533 
 534         .int       L(P0Q5)-L(fwdPxQx)   /* 40 */
 535         .int       L(P1Q5)-L(fwdPxQx)
 536         .int       L(P2Q5)-L(fwdPxQx)
 537         .int       L(P3Q5)-L(fwdPxQx)
 538         .int       L(P4Q5)-L(fwdPxQx)
 539         .int       L(P5Q5)-L(fwdPxQx)
 540         .int       L(P6Q5)-L(fwdPxQx)
 541         .int       L(P7Q5)-L(fwdPxQx) 
 542 
 543         .int       L(P0Q6)-L(fwdPxQx)   /* 48 */
 544         .int       L(P1Q6)-L(fwdPxQx)
 545         .int       L(P2Q6)-L(fwdPxQx)
 546         .int       L(P3Q6)-L(fwdPxQx)
 547         .int       L(P4Q6)-L(fwdPxQx)
 548         .int       L(P5Q6)-L(fwdPxQx)
 549         .int       L(P6Q6)-L(fwdPxQx)
 550         .int       L(P7Q6)-L(fwdPxQx) 
 551 
 552         .int       L(P0Q7)-L(fwdPxQx)   /* 56 */
 553         .int       L(P1Q7)-L(fwdPxQx)
 554         .int       L(P2Q7)-L(fwdPxQx)
 555         .int       L(P3Q7)-L(fwdPxQx)
 556         .int       L(P4Q7)-L(fwdPxQx)
 557         .int       L(P5Q7)-L(fwdPxQx)
 558         .int       L(P6Q7)-L(fwdPxQx)
 559         .int       L(P7Q7)-L(fwdPxQx) 
 560 
 561         .int       L(P0Q8)-L(fwdPxQx)   /* 64 */
 562         .int       L(P1Q8)-L(fwdPxQx)
 563         .int       L(P2Q8)-L(fwdPxQx)
 564         .int       L(P3Q8)-L(fwdPxQx)
 565         .int       L(P4Q8)-L(fwdPxQx)
 566         .int       L(P5Q8)-L(fwdPxQx)
 567         .int       L(P6Q8)-L(fwdPxQx)
 568         .int       L(P7Q8)-L(fwdPxQx)
 569 
 570         .int       L(P0Q9)-L(fwdPxQx)   /* 72 */
 571         .int       L(P1Q9)-L(fwdPxQx)
 572         .int       L(P2Q9)-L(fwdPxQx)
 573         .int       L(P3Q9)-L(fwdPxQx)
 574         .int       L(P4Q9)-L(fwdPxQx)
 575         .int       L(P5Q9)-L(fwdPxQx)
 576         .int       L(P6Q9)-L(fwdPxQx)
 577         .int       L(P7Q9)-L(fwdPxQx)   /* 79 */
 578 
 579         .p2align 4
 580 L(P0Q9):
 581         mov    -0x48(%rdi), %rcx
 582         mov    %rcx, -0x48(%rsi)
 583 L(P0Q8):
 584         mov    -0x40(%rdi), %r10
 585         mov    %r10, -0x40(%rsi)
 586 L(P0Q7):
 587         mov    -0x38(%rdi), %r8
 588         mov    %r8, -0x38(%rsi)
 589 L(P0Q6):
 590         mov    -0x30(%rdi), %rcx
 591         mov    %rcx, -0x30(%rsi)
 592 L(P0Q5):
 593         mov    -0x28(%rdi), %r10
 594         mov    %r10, -0x28(%rsi)
 595 L(P0Q4):
 596         mov    -0x20(%rdi), %r8
 597         mov    %r8, -0x20(%rsi)
 598 L(P0Q3):
 599         mov    -0x18(%rdi), %rcx
 600         mov    %rcx, -0x18(%rsi)
 601 L(P0Q2):
 602         mov    -0x10(%rdi), %r10
 603         mov    %r10, -0x10(%rsi)
 604 L(P0Q1):
 605         mov    -0x8(%rdi), %r8
 606         mov    %r8, -0x8(%rsi)
 607 L(P0Q0):                                   
 608         ret   
 609 
 610         .p2align 4
 611 L(P1Q9):
 612         mov    -0x49(%rdi), %r8
 613         mov    %r8, -0x49(%rsi)
 614 L(P1Q8):
 615         mov    -0x41(%rdi), %rcx
 616         mov    %rcx, -0x41(%rsi)
 617 L(P1Q7):
 618         mov    -0x39(%rdi), %r10
 619         mov    %r10, -0x39(%rsi)
 620 L(P1Q6):
 621         mov    -0x31(%rdi), %r8
 622         mov    %r8, -0x31(%rsi)
 623 L(P1Q5):
 624         mov    -0x29(%rdi), %rcx
 625         mov    %rcx, -0x29(%rsi)
 626 L(P1Q4):
 627         mov    -0x21(%rdi), %r10
 628         mov    %r10, -0x21(%rsi)
 629 L(P1Q3):
 630         mov    -0x19(%rdi), %r8
 631         mov    %r8, -0x19(%rsi)
 632 L(P1Q2):
 633         mov    -0x11(%rdi), %rcx
 634         mov    %rcx, -0x11(%rsi)
 635 L(P1Q1):
 636         mov    -0x9(%rdi), %r10
 637         mov    %r10, -0x9(%rsi)
 638 L(P1Q0):
 639         movzbq -0x1(%rdi), %r8
 640         mov    %r8b, -0x1(%rsi)
 641         ret   
 642 
 643         .p2align 4
 644 L(P2Q9):
 645         mov    -0x4a(%rdi), %r8
 646         mov    %r8, -0x4a(%rsi)
 647 L(P2Q8):
 648         mov    -0x42(%rdi), %rcx
 649         mov    %rcx, -0x42(%rsi)
 650 L(P2Q7):
 651         mov    -0x3a(%rdi), %r10
 652         mov    %r10, -0x3a(%rsi)
 653 L(P2Q6):
 654         mov    -0x32(%rdi), %r8
 655         mov    %r8, -0x32(%rsi)
 656 L(P2Q5):
 657         mov    -0x2a(%rdi), %rcx
 658         mov    %rcx, -0x2a(%rsi)
 659 L(P2Q4):
 660         mov    -0x22(%rdi), %r10
 661         mov    %r10, -0x22(%rsi)
 662 L(P2Q3):
 663         mov    -0x1a(%rdi), %r8
 664         mov    %r8, -0x1a(%rsi)
 665 L(P2Q2):
 666         mov    -0x12(%rdi), %rcx
 667         mov    %rcx, -0x12(%rsi)
 668 L(P2Q1):
 669         mov    -0xa(%rdi), %r10
 670         mov    %r10, -0xa(%rsi)
 671 L(P2Q0):
 672         movzwq -0x2(%rdi), %r8
 673         mov    %r8w, -0x2(%rsi)
 674         ret   
 675 
 676         .p2align 4
 677 L(P3Q9):
 678         mov    -0x4b(%rdi), %r8
 679         mov    %r8, -0x4b(%rsi)
 680 L(P3Q8):
 681         mov    -0x43(%rdi), %rcx
 682         mov    %rcx, -0x43(%rsi)
 683 L(P3Q7):
 684         mov    -0x3b(%rdi), %r10
 685         mov    %r10, -0x3b(%rsi)
 686 L(P3Q6):
 687         mov    -0x33(%rdi), %r8
 688         mov    %r8, -0x33(%rsi)
 689 L(P3Q5):
 690         mov    -0x2b(%rdi), %rcx
 691         mov    %rcx, -0x2b(%rsi)
 692 L(P3Q4):
 693         mov    -0x23(%rdi), %r10
 694         mov    %r10, -0x23(%rsi)
 695 L(P3Q3):
 696         mov    -0x1b(%rdi), %r8
 697         mov    %r8, -0x1b(%rsi)
 698 L(P3Q2):
 699         mov    -0x13(%rdi), %rcx
 700         mov    %rcx, -0x13(%rsi)
 701 L(P3Q1):
 702         mov    -0xb(%rdi), %r10
 703         mov    %r10, -0xb(%rsi)
 704         /*
 705          * These trailing loads/stores have to do all their loads 1st, 
 706          * then do the stores.
 707          */
 708 L(P3Q0):
 709         movzwq -0x3(%rdi), %r8
 710         movzbq -0x1(%rdi), %r10
 711         mov    %r8w, -0x3(%rsi)
 712         mov    %r10b, -0x1(%rsi)
 713         ret   
 714 
 715         .p2align 4
 716 L(P4Q9):
 717         mov    -0x4c(%rdi), %r8
 718         mov    %r8, -0x4c(%rsi)
 719 L(P4Q8):
 720         mov    -0x44(%rdi), %rcx
 721         mov    %rcx, -0x44(%rsi)
 722 L(P4Q7):
 723         mov    -0x3c(%rdi), %r10
 724         mov    %r10, -0x3c(%rsi)
 725 L(P4Q6):
 726         mov    -0x34(%rdi), %r8
 727         mov    %r8, -0x34(%rsi)
 728 L(P4Q5):
 729         mov    -0x2c(%rdi), %rcx
 730         mov    %rcx, -0x2c(%rsi)
 731 L(P4Q4):
 732         mov    -0x24(%rdi), %r10
 733         mov    %r10, -0x24(%rsi)
 734 L(P4Q3):
 735         mov    -0x1c(%rdi), %r8
 736         mov    %r8, -0x1c(%rsi)
 737 L(P4Q2):
 738         mov    -0x14(%rdi), %rcx
 739         mov    %rcx, -0x14(%rsi)
 740 L(P4Q1):
 741         mov    -0xc(%rdi), %r10
 742         mov    %r10, -0xc(%rsi)
 743 L(P4Q0):
 744         mov    -0x4(%rdi), %r8d
 745         mov    %r8d, -0x4(%rsi)
 746         ret   
 747 
 748         .p2align 4
 749 L(P5Q9):
 750         mov    -0x4d(%rdi), %r8
 751         mov    %r8, -0x4d(%rsi)
 752 L(P5Q8):
 753         mov    -0x45(%rdi), %rcx
 754         mov    %rcx, -0x45(%rsi)
 755 L(P5Q7):
 756         mov    -0x3d(%rdi), %r10
 757         mov    %r10, -0x3d(%rsi)
 758 L(P5Q6):
 759         mov    -0x35(%rdi), %r8
 760         mov    %r8, -0x35(%rsi)
 761 L(P5Q5):
 762         mov    -0x2d(%rdi), %rcx
 763         mov    %rcx, -0x2d(%rsi)
 764 L(P5Q4):
 765         mov    -0x25(%rdi), %r10
 766         mov    %r10, -0x25(%rsi)
 767 L(P5Q3):
 768         mov    -0x1d(%rdi), %r8
 769         mov    %r8, -0x1d(%rsi)
 770 L(P5Q2):
 771         mov    -0x15(%rdi), %rcx
 772         mov    %rcx, -0x15(%rsi)
 773 L(P5Q1):
 774         mov    -0xd(%rdi), %r10
 775         mov    %r10, -0xd(%rsi)
 776 L(P5Q0):
 777         mov    -0x5(%rdi), %r8d
 778         movzbq -0x1(%rdi), %r10
 779         mov    %r8d, -0x5(%rsi)
 780         mov    %r10b, -0x1(%rsi)
 781         ret   
 782 
 783         .p2align 4
 784 L(P6Q9):
 785         mov    -0x4e(%rdi), %r8
 786         mov    %r8, -0x4e(%rsi)
 787 L(P6Q8):
 788         mov    -0x46(%rdi), %rcx
 789         mov    %rcx, -0x46(%rsi)
 790 L(P6Q7):
 791         mov    -0x3e(%rdi), %r10
 792         mov    %r10, -0x3e(%rsi)
 793 L(P6Q6):
 794         mov    -0x36(%rdi), %r8
 795         mov    %r8, -0x36(%rsi)
 796 L(P6Q5):
 797         mov    -0x2e(%rdi), %rcx
 798         mov    %rcx, -0x2e(%rsi)
 799 L(P6Q4):
 800         mov    -0x26(%rdi), %r10
 801         mov    %r10, -0x26(%rsi)
 802 L(P6Q3):
 803         mov    -0x1e(%rdi), %r8
 804         mov    %r8, -0x1e(%rsi)
 805 L(P6Q2):
 806         mov    -0x16(%rdi), %rcx
 807         mov    %rcx, -0x16(%rsi)
 808 L(P6Q1):
 809         mov    -0xe(%rdi), %r10
 810         mov    %r10, -0xe(%rsi)
 811 L(P6Q0):
 812         mov    -0x6(%rdi), %r8d
 813         movzwq -0x2(%rdi), %r10
 814         mov    %r8d, -0x6(%rsi)
 815         mov    %r10w, -0x2(%rsi)
 816         ret   
 817 
 818         .p2align 4
 819 L(P7Q9):
 820         mov    -0x4f(%rdi), %r8
 821         mov    %r8, -0x4f(%rsi)
 822 L(P7Q8):
 823         mov    -0x47(%rdi), %rcx
 824         mov    %rcx, -0x47(%rsi)
 825 L(P7Q7):
 826         mov    -0x3f(%rdi), %r10
 827         mov    %r10, -0x3f(%rsi)
 828 L(P7Q6):
 829         mov    -0x37(%rdi), %r8
 830         mov    %r8, -0x37(%rsi)
 831 L(P7Q5):
 832         mov    -0x2f(%rdi), %rcx
 833         mov    %rcx, -0x2f(%rsi)
 834 L(P7Q4):
 835         mov    -0x27(%rdi), %r10
 836         mov    %r10, -0x27(%rsi)
 837 L(P7Q3):
 838         mov    -0x1f(%rdi), %r8
 839         mov    %r8, -0x1f(%rsi)
 840 L(P7Q2):
 841         mov    -0x17(%rdi), %rcx
 842         mov    %rcx, -0x17(%rsi)
 843 L(P7Q1):
 844         mov    -0xf(%rdi), %r10
 845         mov    %r10, -0xf(%rsi)
 846 L(P7Q0):
 847         mov    -0x7(%rdi), %r8d
 848         movzwq -0x3(%rdi), %r10
 849         movzbq -0x1(%rdi), %rcx
 850         mov    %r8d, -0x7(%rsi)
 851         mov    %r10w, -0x3(%rsi)
 852         mov    %cl, -0x1(%rsi)
 853         ret   
 854 
 855         /*
 856          * For large sizes rep smovq is fastest.
 857          * Transition point determined experimentally as measured on
 858          * Intel Xeon processors (incl. Nehalem and previous generations) and
 859          * AMD Opteron. The transition value is patched at boot time to avoid
 860          * memory reference hit.
 861          */
 862         .globl bcopy_patch_start
 863 bcopy_patch_start:
 864         cmpq    $BCOPY_NHM_REP, %rdx
 865         .globl bcopy_patch_end
 866 bcopy_patch_end:
 867 
 868         .p2align 4
 869         ALTENTRY(bcopy_ck_size)
 870 
 871         cmpq    $BCOPY_DFLT_REP, %rdx
 872         jae     L(use_rep)
 873 
 874         /*
 875          * Align to a 8-byte boundary. Avoids penalties from unaligned stores
 876          * as well as from stores spanning cachelines.
 877          */
 878         test    $0x7, %rsi
 879         jz      L(aligned_loop)
 880         test    $0x1, %rsi
 881         jz      2f
 882         movzbq  (%rdi), %r8
 883         dec     %rdx
 884         inc     %rdi
 885         mov     %r8b, (%rsi)
 886         inc     %rsi
 887 2:
 888         test    $0x2, %rsi
 889         jz      4f
 890         movzwq  (%rdi), %r8
 891         sub     $0x2, %rdx
 892         add     $0x2, %rdi
 893         mov     %r8w, (%rsi)
 894         add     $0x2, %rsi
 895 4:
 896         test    $0x4, %rsi
 897         jz      L(aligned_loop)
 898         mov     (%rdi), %r8d
 899         sub     $0x4, %rdx
 900         add     $0x4, %rdi
 901         mov     %r8d, (%rsi)
 902         add     $0x4, %rsi
 903 
 904         /*
 905          * Copy 64-bytes per loop
 906          */
 907         .p2align 4
 908 L(aligned_loop):
 909         mov     (%rdi), %r8
 910         mov     0x8(%rdi), %r10
 911         lea     -0x40(%rdx), %rdx
 912         mov     %r8, (%rsi)
 913         mov     %r10, 0x8(%rsi)
 914         mov     0x10(%rdi), %rcx
 915         mov     0x18(%rdi), %r8
 916         mov     %rcx, 0x10(%rsi)
 917         mov     %r8, 0x18(%rsi)
 918 
 919         cmp     $0x40, %rdx
 920         mov     0x20(%rdi), %r10
 921         mov     0x28(%rdi), %rcx
 922         mov     %r10, 0x20(%rsi)
 923         mov     %rcx, 0x28(%rsi)
 924         mov     0x30(%rdi), %r8
 925         mov     0x38(%rdi), %r10
 926         lea     0x40(%rdi), %rdi
 927         mov     %r8, 0x30(%rsi)
 928         mov     %r10, 0x38(%rsi)
 929         lea     0x40(%rsi), %rsi
 930         jae     L(aligned_loop)
 931 
 932         /*
 933          * Copy remaining bytes (0-63)
 934          */
 935 L(do_remainder):
 936         leaq    L(fwdPxQx)(%rip), %r10
 937         addq    %rdx, %rdi
 938         addq    %rdx, %rsi
 939         movslq  (%r10,%rdx,4), %rcx
 940         leaq    (%rcx,%r10,1), %r10
 941         jmpq    *%r10
 942 
 943         /*
 944          * Use rep smovq. Clear remainder via unrolled code
 945          */
 946         .p2align 4
 947 L(use_rep):
 948         xchgq   %rdi, %rsi              /* %rsi = source, %rdi = destination */
 949         movq    %rdx, %rcx              /* %rcx = count */
 950         shrq    $3, %rcx                /* 8-byte word count */
 951         rep
 952           smovq
 953 
 954         xchgq   %rsi, %rdi              /* %rdi = src, %rsi = destination */
 955         andq    $7, %rdx                /* remainder */
 956         jnz     L(do_remainder)
 957         ret
 958 #undef  L
 959         SET_SIZE(bcopy_ck_size)
 960 
 961 #ifdef DEBUG
 962         /*
 963          * Setup frame on the run-time stack. The end of the input argument
 964          * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
 965          * always points to the end of the latest allocated stack frame.
 966          * panic(const char *format, ...) is a varargs function. When a
 967          * function taking variable arguments is called, %rax must be set
 968          * to eight times the number of floating point parameters passed
 969          * to the function in SSE registers.
 970          */
 971 call_panic:
 972         pushq   %rbp                    /* align stack properly */
 973         movq    %rsp, %rbp
 974         xorl    %eax, %eax              /* no variable arguments */
 975         call    panic                   /* %rdi = format string */
 976 #endif
 977         SET_SIZE(bcopy_altentry)
 978         SET_SIZE(bcopy)
 979 
 980 #elif defined(__i386)
 981 
 982 #define ARG_FROM        4
 983 #define ARG_TO          8
 984 #define ARG_COUNT       12
 985 
 986         ENTRY(bcopy)
 987 #ifdef DEBUG
 988         movl    ARG_COUNT(%esp), %eax
 989         orl     %eax, %eax
 990         jz      1f
 991         movl    postbootkernelbase, %eax
 992         cmpl    %eax, ARG_FROM(%esp)
 993         jb      0f
 994         cmpl    %eax, ARG_TO(%esp)
 995         jnb     1f
 996 0:      pushl   %ebp
 997         movl    %esp, %ebp
 998         pushl   $.bcopy_panic_msg
 999         call    panic
1000 1:
1001 #endif
1002 do_copy:
1003         movl    %esi, %eax              /* save registers */
1004         movl    %edi, %edx
1005         movl    ARG_COUNT(%esp), %ecx
1006         movl    ARG_FROM(%esp), %esi
1007         movl    ARG_TO(%esp), %edi
1008 
1009         shrl    $2, %ecx                /* word count */
1010         rep
1011           smovl
1012         movl    ARG_COUNT(%esp), %ecx
1013         andl    $3, %ecx                /* bytes left over */
1014         rep
1015           smovb
1016         movl    %eax, %esi              /* restore registers */
1017         movl    %edx, %edi
1018         ret
1019         SET_SIZE(bcopy)
1020 
1021 #undef  ARG_COUNT
1022 #undef  ARG_FROM
1023 #undef  ARG_TO
1024 
1025 #endif  /* __i386 */
1026 #endif  /* __lint */
1027 
1028 
1029 /*
1030  * Zero a block of storage, returning an error code if we
1031  * take a kernel pagefault which cannot be resolved.
1032  * Returns errno value on pagefault error, 0 if all ok
1033  */
1034 
1035 #if defined(__lint)
1036 
1037 /* ARGSUSED */
1038 int
1039 kzero(void *addr, size_t count)
1040 { return (0); }
1041 
1042 #else   /* __lint */
1043 
1044 #if defined(__amd64)
1045 
1046         ENTRY(kzero)
1047 #ifdef DEBUG
1048         cmpq    postbootkernelbase(%rip), %rdi  /* %rdi = addr */
1049         jnb     0f
1050         leaq    .kzero_panic_msg(%rip), %rdi
1051         jmp     call_panic              /* setup stack and call panic */
1052 0:
1053 #endif
1054         /*
1055          * pass lofault value as 3rd argument for fault return 
1056          */
1057         leaq    _kzeroerr(%rip), %rdx
1058 
1059         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
1060         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
1061         movq    %rdx, T_LOFAULT(%r9)    /* new lofault */
1062         call    bzero_altentry
1063         xorl    %eax, %eax
1064         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
1065         ret
1066         /*
1067          * A fault during bzero is indicated through an errno value
1068          * in %rax when we iretq to here.
1069          */
1070 _kzeroerr:
1071         addq    $8, %rsp                /* pop bzero_altentry call ret addr */
1072         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
1073         ret
1074         SET_SIZE(kzero)
1075 
1076 #elif defined(__i386)
1077 
1078 #define ARG_ADDR        8
1079 #define ARG_COUNT       12
1080 
1081         ENTRY(kzero)
1082 #ifdef DEBUG
1083         pushl   %ebp
1084         movl    %esp, %ebp
1085         movl    postbootkernelbase, %eax
1086         cmpl    %eax, ARG_ADDR(%ebp)
1087         jnb     0f
1088         pushl   $.kzero_panic_msg
1089         call    panic
1090 0:      popl    %ebp
1091 #endif
1092         lea     _kzeroerr, %eax         /* kzeroerr is lofault value */
1093 
1094         pushl   %ebp                    /* save stack base */
1095         movl    %esp, %ebp              /* set new stack base */
1096         pushl   %edi                    /* save %edi */
1097 
1098         mov     %gs:CPU_THREAD, %edx    
1099         movl    T_LOFAULT(%edx), %edi
1100         pushl   %edi                    /* save the current lofault */
1101         movl    %eax, T_LOFAULT(%edx)   /* new lofault */
1102 
1103         movl    ARG_COUNT(%ebp), %ecx   /* get size in bytes */
1104         movl    ARG_ADDR(%ebp), %edi    /* %edi <- address of bytes to clear */
1105         shrl    $2, %ecx                /* Count of double words to zero */
1106         xorl    %eax, %eax              /* sstol val */
1107         rep
1108           sstol                 /* %ecx contains words to clear (%eax=0) */
1109 
1110         movl    ARG_COUNT(%ebp), %ecx   /* get size in bytes */
1111         andl    $3, %ecx                /* do mod 4 */
1112         rep
1113           sstob                 /* %ecx contains residual bytes to clear */
1114 
1115         /*
1116          * A fault during kzero is indicated through an errno value
1117          * in %eax when we iret to here.
1118          */
1119 _kzeroerr:
1120         popl    %edi
1121         movl    %edi, T_LOFAULT(%edx)   /* restore the original lofault */
1122         popl    %edi
1123         popl    %ebp
1124         ret
1125         SET_SIZE(kzero)
1126 
1127 #undef  ARG_ADDR
1128 #undef  ARG_COUNT
1129 
1130 #endif  /* __i386 */
1131 #endif  /* __lint */
1132 
1133 /*
1134  * Zero a block of storage.
1135  */
1136 
1137 #if defined(__lint)
1138 
1139 /* ARGSUSED */
1140 void
1141 bzero(void *addr, size_t count)
1142 {}
1143 
1144 #else   /* __lint */
1145 
1146 #if defined(__amd64)
1147 
1148         ENTRY(bzero)
1149 #ifdef DEBUG
1150         cmpq    postbootkernelbase(%rip), %rdi  /* %rdi = addr */
1151         jnb     0f
1152         leaq    .bzero_panic_msg(%rip), %rdi
1153         jmp     call_panic              /* setup stack and call panic */
1154 0:
1155 #endif
1156         ALTENTRY(bzero_altentry)
1157 do_zero:
1158 #define L(s) .bzero/**/s
1159         xorl    %eax, %eax
1160 
1161         cmpq    $0x50, %rsi             /* 80 */
1162         jae     L(ck_align)
1163 
1164         /*
1165          * Performance data shows many caller's are zeroing small buffers. So
1166          * for best perf for these sizes unrolled code is used. Store zeros
1167          * without worrying about alignment.
1168          */
1169         leaq    L(setPxQx)(%rip), %r10
1170         addq    %rsi, %rdi
1171         movslq  (%r10,%rsi,4), %rcx
1172         leaq    (%rcx,%r10,1), %r10
1173         jmpq    *%r10
1174 
1175         .p2align 4
1176 L(setPxQx):
1177         .int       L(P0Q0)-L(setPxQx)   /* 0 */
1178         .int       L(P1Q0)-L(setPxQx)
1179         .int       L(P2Q0)-L(setPxQx)
1180         .int       L(P3Q0)-L(setPxQx)
1181         .int       L(P4Q0)-L(setPxQx)
1182         .int       L(P5Q0)-L(setPxQx)
1183         .int       L(P6Q0)-L(setPxQx)
1184         .int       L(P7Q0)-L(setPxQx) 
1185 
1186         .int       L(P0Q1)-L(setPxQx)   /* 8 */
1187         .int       L(P1Q1)-L(setPxQx)
1188         .int       L(P2Q1)-L(setPxQx)
1189         .int       L(P3Q1)-L(setPxQx)
1190         .int       L(P4Q1)-L(setPxQx)
1191         .int       L(P5Q1)-L(setPxQx)
1192         .int       L(P6Q1)-L(setPxQx)
1193         .int       L(P7Q1)-L(setPxQx) 
1194 
1195         .int       L(P0Q2)-L(setPxQx)   /* 16 */
1196         .int       L(P1Q2)-L(setPxQx)
1197         .int       L(P2Q2)-L(setPxQx)
1198         .int       L(P3Q2)-L(setPxQx)
1199         .int       L(P4Q2)-L(setPxQx)
1200         .int       L(P5Q2)-L(setPxQx)
1201         .int       L(P6Q2)-L(setPxQx)
1202         .int       L(P7Q2)-L(setPxQx) 
1203 
1204         .int       L(P0Q3)-L(setPxQx)   /* 24 */
1205         .int       L(P1Q3)-L(setPxQx)
1206         .int       L(P2Q3)-L(setPxQx)
1207         .int       L(P3Q3)-L(setPxQx)
1208         .int       L(P4Q3)-L(setPxQx)
1209         .int       L(P5Q3)-L(setPxQx)
1210         .int       L(P6Q3)-L(setPxQx)
1211         .int       L(P7Q3)-L(setPxQx) 
1212 
1213         .int       L(P0Q4)-L(setPxQx)   /* 32 */
1214         .int       L(P1Q4)-L(setPxQx)
1215         .int       L(P2Q4)-L(setPxQx)
1216         .int       L(P3Q4)-L(setPxQx)
1217         .int       L(P4Q4)-L(setPxQx)
1218         .int       L(P5Q4)-L(setPxQx)
1219         .int       L(P6Q4)-L(setPxQx)
1220         .int       L(P7Q4)-L(setPxQx) 
1221 
1222         .int       L(P0Q5)-L(setPxQx)   /* 40 */
1223         .int       L(P1Q5)-L(setPxQx)
1224         .int       L(P2Q5)-L(setPxQx)
1225         .int       L(P3Q5)-L(setPxQx)
1226         .int       L(P4Q5)-L(setPxQx)
1227         .int       L(P5Q5)-L(setPxQx)
1228         .int       L(P6Q5)-L(setPxQx)
1229         .int       L(P7Q5)-L(setPxQx) 
1230 
1231         .int       L(P0Q6)-L(setPxQx)   /* 48 */
1232         .int       L(P1Q6)-L(setPxQx)
1233         .int       L(P2Q6)-L(setPxQx)
1234         .int       L(P3Q6)-L(setPxQx)
1235         .int       L(P4Q6)-L(setPxQx)
1236         .int       L(P5Q6)-L(setPxQx)
1237         .int       L(P6Q6)-L(setPxQx)
1238         .int       L(P7Q6)-L(setPxQx) 
1239 
1240         .int       L(P0Q7)-L(setPxQx)   /* 56 */
1241         .int       L(P1Q7)-L(setPxQx)
1242         .int       L(P2Q7)-L(setPxQx)
1243         .int       L(P3Q7)-L(setPxQx)
1244         .int       L(P4Q7)-L(setPxQx)
1245         .int       L(P5Q7)-L(setPxQx)
1246         .int       L(P6Q7)-L(setPxQx)
1247         .int       L(P7Q7)-L(setPxQx) 
1248 
1249         .int       L(P0Q8)-L(setPxQx)   /* 64 */
1250         .int       L(P1Q8)-L(setPxQx)
1251         .int       L(P2Q8)-L(setPxQx)
1252         .int       L(P3Q8)-L(setPxQx)
1253         .int       L(P4Q8)-L(setPxQx)
1254         .int       L(P5Q8)-L(setPxQx)
1255         .int       L(P6Q8)-L(setPxQx)
1256         .int       L(P7Q8)-L(setPxQx)
1257 
1258         .int       L(P0Q9)-L(setPxQx)   /* 72 */
1259         .int       L(P1Q9)-L(setPxQx)
1260         .int       L(P2Q9)-L(setPxQx)
1261         .int       L(P3Q9)-L(setPxQx)
1262         .int       L(P4Q9)-L(setPxQx)
1263         .int       L(P5Q9)-L(setPxQx)
1264         .int       L(P6Q9)-L(setPxQx)
1265         .int       L(P7Q9)-L(setPxQx)   /* 79 */
1266 
1267         .p2align 4
1268 L(P0Q9): mov    %rax, -0x48(%rdi)
1269 L(P0Q8): mov    %rax, -0x40(%rdi)
1270 L(P0Q7): mov    %rax, -0x38(%rdi)
1271 L(P0Q6): mov    %rax, -0x30(%rdi)
1272 L(P0Q5): mov    %rax, -0x28(%rdi)
1273 L(P0Q4): mov    %rax, -0x20(%rdi)
1274 L(P0Q3): mov    %rax, -0x18(%rdi)
1275 L(P0Q2): mov    %rax, -0x10(%rdi)
1276 L(P0Q1): mov    %rax, -0x8(%rdi)
1277 L(P0Q0): 
1278          ret
1279 
1280         .p2align 4
1281 L(P1Q9): mov    %rax, -0x49(%rdi)
1282 L(P1Q8): mov    %rax, -0x41(%rdi)
1283 L(P1Q7): mov    %rax, -0x39(%rdi)
1284 L(P1Q6): mov    %rax, -0x31(%rdi)
1285 L(P1Q5): mov    %rax, -0x29(%rdi)
1286 L(P1Q4): mov    %rax, -0x21(%rdi)
1287 L(P1Q3): mov    %rax, -0x19(%rdi)
1288 L(P1Q2): mov    %rax, -0x11(%rdi)
1289 L(P1Q1): mov    %rax, -0x9(%rdi)
1290 L(P1Q0): mov    %al, -0x1(%rdi)
1291          ret
1292 
1293         .p2align 4
1294 L(P2Q9): mov    %rax, -0x4a(%rdi)
1295 L(P2Q8): mov    %rax, -0x42(%rdi)
1296 L(P2Q7): mov    %rax, -0x3a(%rdi)
1297 L(P2Q6): mov    %rax, -0x32(%rdi)
1298 L(P2Q5): mov    %rax, -0x2a(%rdi)
1299 L(P2Q4): mov    %rax, -0x22(%rdi)
1300 L(P2Q3): mov    %rax, -0x1a(%rdi)
1301 L(P2Q2): mov    %rax, -0x12(%rdi)
1302 L(P2Q1): mov    %rax, -0xa(%rdi)
1303 L(P2Q0): mov    %ax, -0x2(%rdi)
1304          ret
1305 
1306         .p2align 4
1307 L(P3Q9): mov    %rax, -0x4b(%rdi)
1308 L(P3Q8): mov    %rax, -0x43(%rdi)
1309 L(P3Q7): mov    %rax, -0x3b(%rdi)
1310 L(P3Q6): mov    %rax, -0x33(%rdi)
1311 L(P3Q5): mov    %rax, -0x2b(%rdi)
1312 L(P3Q4): mov    %rax, -0x23(%rdi)
1313 L(P3Q3): mov    %rax, -0x1b(%rdi)
1314 L(P3Q2): mov    %rax, -0x13(%rdi)
1315 L(P3Q1): mov    %rax, -0xb(%rdi)
1316 L(P3Q0): mov    %ax, -0x3(%rdi)
1317          mov    %al, -0x1(%rdi)
1318          ret
1319 
1320         .p2align 4
1321 L(P4Q9): mov    %rax, -0x4c(%rdi)
1322 L(P4Q8): mov    %rax, -0x44(%rdi)
1323 L(P4Q7): mov    %rax, -0x3c(%rdi)
1324 L(P4Q6): mov    %rax, -0x34(%rdi)
1325 L(P4Q5): mov    %rax, -0x2c(%rdi)
1326 L(P4Q4): mov    %rax, -0x24(%rdi)
1327 L(P4Q3): mov    %rax, -0x1c(%rdi)
1328 L(P4Q2): mov    %rax, -0x14(%rdi)
1329 L(P4Q1): mov    %rax, -0xc(%rdi)
1330 L(P4Q0): mov    %eax, -0x4(%rdi)
1331          ret
1332 
1333         .p2align 4
1334 L(P5Q9): mov    %rax, -0x4d(%rdi)
1335 L(P5Q8): mov    %rax, -0x45(%rdi)
1336 L(P5Q7): mov    %rax, -0x3d(%rdi)
1337 L(P5Q6): mov    %rax, -0x35(%rdi)
1338 L(P5Q5): mov    %rax, -0x2d(%rdi)
1339 L(P5Q4): mov    %rax, -0x25(%rdi)
1340 L(P5Q3): mov    %rax, -0x1d(%rdi)
1341 L(P5Q2): mov    %rax, -0x15(%rdi)
1342 L(P5Q1): mov    %rax, -0xd(%rdi)
1343 L(P5Q0): mov    %eax, -0x5(%rdi)
1344          mov    %al, -0x1(%rdi)
1345          ret
1346 
1347         .p2align 4
1348 L(P6Q9): mov    %rax, -0x4e(%rdi)
1349 L(P6Q8): mov    %rax, -0x46(%rdi)
1350 L(P6Q7): mov    %rax, -0x3e(%rdi)
1351 L(P6Q6): mov    %rax, -0x36(%rdi)
1352 L(P6Q5): mov    %rax, -0x2e(%rdi)
1353 L(P6Q4): mov    %rax, -0x26(%rdi)
1354 L(P6Q3): mov    %rax, -0x1e(%rdi)
1355 L(P6Q2): mov    %rax, -0x16(%rdi)
1356 L(P6Q1): mov    %rax, -0xe(%rdi)
1357 L(P6Q0): mov    %eax, -0x6(%rdi)
1358          mov    %ax, -0x2(%rdi)
1359          ret
1360 
1361         .p2align 4
1362 L(P7Q9): mov    %rax, -0x4f(%rdi)
1363 L(P7Q8): mov    %rax, -0x47(%rdi)
1364 L(P7Q7): mov    %rax, -0x3f(%rdi)
1365 L(P7Q6): mov    %rax, -0x37(%rdi)
1366 L(P7Q5): mov    %rax, -0x2f(%rdi)
1367 L(P7Q4): mov    %rax, -0x27(%rdi)
1368 L(P7Q3): mov    %rax, -0x1f(%rdi)
1369 L(P7Q2): mov    %rax, -0x17(%rdi)
1370 L(P7Q1): mov    %rax, -0xf(%rdi)
1371 L(P7Q0): mov    %eax, -0x7(%rdi)
1372          mov    %ax, -0x3(%rdi)
1373          mov    %al, -0x1(%rdi)
1374          ret
1375 
1376         /*
1377          * Align to a 16-byte boundary. Avoids penalties from unaligned stores
1378          * as well as from stores spanning cachelines. Note 16-byte alignment
1379          * is better in case where rep sstosq is used.
1380          */
1381         .p2align 4
1382 L(ck_align):
1383         test    $0xf, %rdi
1384         jz      L(aligned_now)
1385         test    $1, %rdi
1386         jz      2f
1387         mov     %al, (%rdi)
1388         dec     %rsi
1389         lea     1(%rdi),%rdi
1390 2:
1391         test    $2, %rdi
1392         jz      4f
1393         mov     %ax, (%rdi)
1394         sub     $2, %rsi
1395         lea     2(%rdi),%rdi
1396 4:
1397         test    $4, %rdi
1398         jz      8f
1399         mov     %eax, (%rdi)
1400         sub     $4, %rsi
1401         lea     4(%rdi),%rdi
1402 8:
1403         test    $8, %rdi
1404         jz      L(aligned_now)
1405         mov     %rax, (%rdi)
1406         sub     $8, %rsi
1407         lea     8(%rdi),%rdi
1408 
1409         /*
1410          * For large sizes rep sstoq is fastest.
1411          * Transition point determined experimentally as measured on
1412          * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
1413          */
1414 L(aligned_now):
1415         cmp     $BZERO_USE_REP, %rsi
1416         ja      L(use_rep)
1417 
1418         /*
1419          * zero 64-bytes per loop
1420          */
1421         .p2align 4
1422 L(bzero_loop):
1423         leaq    -0x40(%rsi), %rsi
1424         cmpq    $0x40, %rsi
1425         movq    %rax, (%rdi) 
1426         movq    %rax, 0x8(%rdi) 
1427         movq    %rax, 0x10(%rdi) 
1428         movq    %rax, 0x18(%rdi) 
1429         movq    %rax, 0x20(%rdi) 
1430         movq    %rax, 0x28(%rdi) 
1431         movq    %rax, 0x30(%rdi) 
1432         movq    %rax, 0x38(%rdi) 
1433         leaq    0x40(%rdi), %rdi
1434         jae     L(bzero_loop)
1435 
1436         /*
1437          * Clear any remaining bytes..
1438          */
1439 9:
1440         leaq    L(setPxQx)(%rip), %r10
1441         addq    %rsi, %rdi
1442         movslq  (%r10,%rsi,4), %rcx
1443         leaq    (%rcx,%r10,1), %r10
1444         jmpq    *%r10
1445 
1446         /*
1447          * Use rep sstoq. Clear any remainder via unrolled code
1448          */
1449         .p2align 4
1450 L(use_rep):
1451         movq    %rsi, %rcx              /* get size in bytes */
1452         shrq    $3, %rcx                /* count of 8-byte words to zero */
1453         rep
1454           sstoq                         /* %rcx = words to clear (%rax=0) */
1455         andq    $7, %rsi                /* remaining bytes */
1456         jnz     9b
1457         ret
1458 #undef  L
1459         SET_SIZE(bzero_altentry)
1460         SET_SIZE(bzero)
1461 
1462 #elif defined(__i386)
1463 
1464 #define ARG_ADDR        4
1465 #define ARG_COUNT       8
1466 
1467         ENTRY(bzero)
1468 #ifdef DEBUG
1469         movl    postbootkernelbase, %eax
1470         cmpl    %eax, ARG_ADDR(%esp)
1471         jnb     0f
1472         pushl   %ebp
1473         movl    %esp, %ebp
1474         pushl   $.bzero_panic_msg
1475         call    panic
1476 0:
1477 #endif
1478 do_zero:
1479         movl    %edi, %edx
1480         movl    ARG_COUNT(%esp), %ecx
1481         movl    ARG_ADDR(%esp), %edi
1482         shrl    $2, %ecx
1483         xorl    %eax, %eax
1484         rep
1485           sstol
1486         movl    ARG_COUNT(%esp), %ecx
1487         andl    $3, %ecx
1488         rep
1489           sstob
1490         movl    %edx, %edi
1491         ret
1492         SET_SIZE(bzero)
1493 
1494 #undef  ARG_ADDR
1495 #undef  ARG_COUNT
1496 
1497 #endif  /* __i386 */
1498 #endif  /* __lint */
1499 
1500 /*
1501  * Transfer data to and from user space -
1502  * Note that these routines can cause faults
1503  * It is assumed that the kernel has nothing at
1504  * less than KERNELBASE in the virtual address space.
1505  *
1506  * Note that copyin(9F) and copyout(9F) are part of the
1507  * DDI/DKI which specifies that they return '-1' on "errors."
1508  *
1509  * Sigh.
1510  *
1511  * So there's two extremely similar routines - xcopyin_nta() and
1512  * xcopyout_nta() which return the errno that we've faithfully computed.
1513  * This allows other callers (e.g. uiomove(9F)) to work correctly.
1514  * Given that these are used pretty heavily, we expand the calling
1515  * sequences inline for all flavours (rather than making wrappers).
1516  */
1517 
1518 /*
1519  * Copy user data to kernel space.
1520  */
1521 
1522 #if defined(__lint)
1523 
1524 /* ARGSUSED */
1525 int
1526 copyin(const void *uaddr, void *kaddr, size_t count)
1527 { return (0); }
1528 
1529 #else   /* lint */
1530 
1531 #if defined(__amd64)
1532 
1533         ENTRY(copyin)
1534         pushq   %rbp
1535         movq    %rsp, %rbp
1536         subq    $24, %rsp
1537 
1538         /*
1539          * save args in case we trap and need to rerun as a copyop
1540          */
1541         movq    %rdi, (%rsp)
1542         movq    %rsi, 0x8(%rsp)
1543         movq    %rdx, 0x10(%rsp)
1544 
1545         movq    kernelbase(%rip), %rax
1546 #ifdef DEBUG
1547         cmpq    %rax, %rsi              /* %rsi = kaddr */
1548         jnb     1f
1549         leaq    .copyin_panic_msg(%rip), %rdi
1550         xorl    %eax, %eax
1551         call    panic
1552 1:
1553 #endif
1554         /*
1555          * pass lofault value as 4th argument to do_copy_fault
1556          */
1557         leaq    _copyin_err(%rip), %rcx
1558 
1559         movq    %gs:CPU_THREAD, %r9
1560         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1561         jae     3f                      /* take copyop if uaddr > kernelbase */
1562         SMAP_DISABLE_INSTR(0)
1563         jmp     do_copy_fault           /* Takes care of leave for us */
1564 
1565 _copyin_err:
1566         SMAP_ENABLE_INSTR(2)
1567         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */  
1568         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1569 3:
1570         movq    T_COPYOPS(%r9), %rax
1571         cmpq    $0, %rax
1572         jz      2f
1573         /*
1574          * reload args for the copyop
1575          */
1576         movq    (%rsp), %rdi
1577         movq    0x8(%rsp), %rsi
1578         movq    0x10(%rsp), %rdx
1579         leave
1580         jmp     *CP_COPYIN(%rax)
1581 
1582 2:      movl    $-1, %eax       
1583         leave
1584         ret
1585         SET_SIZE(copyin)
1586 
1587 #elif defined(__i386)
1588 
1589 #define ARG_UADDR       4
1590 #define ARG_KADDR       8
1591 
1592         ENTRY(copyin)
1593         movl    kernelbase, %ecx
1594 #ifdef DEBUG
1595         cmpl    %ecx, ARG_KADDR(%esp)
1596         jnb     1f
1597         pushl   %ebp
1598         movl    %esp, %ebp
1599         pushl   $.copyin_panic_msg
1600         call    panic
1601 1:
1602 #endif
1603         lea     _copyin_err, %eax
1604 
1605         movl    %gs:CPU_THREAD, %edx
1606         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1607         jb      do_copy_fault
1608         jmp     3f
1609 
1610 _copyin_err:
1611         popl    %ecx
1612         popl    %edi
1613         movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1614         popl    %esi
1615         popl    %ebp
1616 3:
1617         movl    T_COPYOPS(%edx), %eax
1618         cmpl    $0, %eax
1619         jz      2f
1620         jmp     *CP_COPYIN(%eax)
1621 
1622 2:      movl    $-1, %eax
1623         ret
1624         SET_SIZE(copyin)
1625 
1626 #undef  ARG_UADDR
1627 #undef  ARG_KADDR
1628 
1629 #endif  /* __i386 */
1630 #endif  /* __lint */
1631 
1632 #if defined(__lint)
1633 
1634 /* ARGSUSED */
1635 int
1636 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached)
1637 { return (0); }
1638 
1639 #else   /* __lint */
1640 
1641 #if defined(__amd64)
1642 
1643         ENTRY(xcopyin_nta)
1644         pushq   %rbp
1645         movq    %rsp, %rbp
1646         subq    $24, %rsp
1647 
1648         /*
1649          * save args in case we trap and need to rerun as a copyop
1650          * %rcx is consumed in this routine so we don't need to save
1651          * it.
1652          */
1653         movq    %rdi, (%rsp)
1654         movq    %rsi, 0x8(%rsp)
1655         movq    %rdx, 0x10(%rsp)
1656 
1657         movq    kernelbase(%rip), %rax
1658 #ifdef DEBUG
1659         cmpq    %rax, %rsi              /* %rsi = kaddr */
1660         jnb     1f
1661         leaq    .xcopyin_panic_msg(%rip), %rdi
1662         xorl    %eax, %eax
1663         call    panic
1664 1:
1665 #endif
1666         movq    %gs:CPU_THREAD, %r9
1667         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1668         jae     4f
1669         cmpq    $0, %rcx                /* No non-temporal access? */
1670         /*
1671          * pass lofault value as 4th argument to do_copy_fault
1672          */
1673         leaq    _xcopyin_err(%rip), %rcx        /* doesn't set rflags */
1674         jnz     6f                      /* use regular access */
1675         /*
1676          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1677          */
1678         cmpq    $XCOPY_MIN_SIZE, %rdx
1679         jae     5f
1680 6:
1681         SMAP_DISABLE_INSTR(1)
1682         jmp     do_copy_fault
1683         
1684         /*
1685          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1686          * count is COUNT_ALIGN_SIZE aligned.
1687          */
1688 5:
1689         movq    %rdi, %r10
1690         orq     %rsi, %r10
1691         andq    $NTA_ALIGN_MASK, %r10
1692         orq     %rdx, %r10
1693         andq    $COUNT_ALIGN_MASK, %r10
1694         jnz     6b      
1695         leaq    _xcopyin_nta_err(%rip), %rcx    /* doesn't set rflags */
1696         SMAP_DISABLE_INSTR(2)
1697         jmp     do_copy_fault_nta       /* use non-temporal access */
1698         
1699 4:
1700         movl    $EFAULT, %eax
1701         jmp     3f
1702 
1703         /*
1704          * A fault during do_copy_fault or do_copy_fault_nta is
1705          * indicated through an errno value in %rax and we iret from the
1706          * trap handler to here.
1707          */
1708 _xcopyin_err:
1709         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1710 _xcopyin_nta_err:
1711         SMAP_ENABLE_INSTR(3)
1712         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1713 3:
1714         movq    T_COPYOPS(%r9), %r8
1715         cmpq    $0, %r8
1716         jz      2f
1717 
1718         /*
1719          * reload args for the copyop
1720          */
1721         movq    (%rsp), %rdi
1722         movq    0x8(%rsp), %rsi
1723         movq    0x10(%rsp), %rdx
1724         leave
1725         jmp     *CP_XCOPYIN(%r8)
1726 
1727 2:      leave
1728         ret
1729         SET_SIZE(xcopyin_nta)
1730 
1731 #elif defined(__i386)
1732 
1733 #define ARG_UADDR       4
1734 #define ARG_KADDR       8
1735 #define ARG_COUNT       12
1736 #define ARG_CACHED      16
1737 
1738         .globl  use_sse_copy
1739 
1740         ENTRY(xcopyin_nta)
1741         movl    kernelbase, %ecx
1742         lea     _xcopyin_err, %eax
1743         movl    %gs:CPU_THREAD, %edx
1744         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1745         jae     4f
1746 
1747         cmpl    $0, use_sse_copy        /* no sse support */
1748         jz      do_copy_fault
1749 
1750         cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
1751         jnz     do_copy_fault
1752 
1753         /*
1754          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1755          */
1756         cmpl    $XCOPY_MIN_SIZE, ARG_COUNT(%esp)
1757         jb      do_copy_fault
1758         
1759         /*
1760          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1761          * count is COUNT_ALIGN_SIZE aligned.
1762          */
1763         movl    ARG_UADDR(%esp), %ecx
1764         orl     ARG_KADDR(%esp), %ecx
1765         andl    $NTA_ALIGN_MASK, %ecx
1766         orl     ARG_COUNT(%esp), %ecx
1767         andl    $COUNT_ALIGN_MASK, %ecx
1768         jnz     do_copy_fault
1769 
1770         jmp     do_copy_fault_nta       /* use regular access */
1771 
1772 4:
1773         movl    $EFAULT, %eax
1774         jmp     3f
1775 
1776         /*
1777          * A fault during do_copy_fault or do_copy_fault_nta is
1778          * indicated through an errno value in %eax and we iret from the
1779          * trap handler to here.
1780          */
1781 _xcopyin_err:
1782         popl    %ecx
1783         popl    %edi
1784         movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1785         popl    %esi
1786         popl    %ebp
1787 3:
1788         cmpl    $0, T_COPYOPS(%edx)
1789         jz      2f
1790         movl    T_COPYOPS(%edx), %eax
1791         jmp     *CP_XCOPYIN(%eax)
1792 
1793 2:      rep;    ret     /* use 2 byte return instruction when branch target */
1794                         /* AMD Software Optimization Guide - Section 6.2 */
1795         SET_SIZE(xcopyin_nta)
1796 
1797 #undef  ARG_UADDR
1798 #undef  ARG_KADDR
1799 #undef  ARG_COUNT
1800 #undef  ARG_CACHED
1801 
1802 #endif  /* __i386 */
1803 #endif  /* __lint */
1804 
1805 /*
1806  * Copy kernel data to user space.
1807  */
1808 
1809 #if defined(__lint)
1810 
1811 /* ARGSUSED */
1812 int
1813 copyout(const void *kaddr, void *uaddr, size_t count)
1814 { return (0); }
1815 
1816 #else   /* __lint */
1817 
1818 #if defined(__amd64)
1819 
1820         ENTRY(copyout)
1821         pushq   %rbp
1822         movq    %rsp, %rbp
1823         subq    $24, %rsp
1824 
1825         /*
1826          * save args in case we trap and need to rerun as a copyop
1827          */
1828         movq    %rdi, (%rsp)
1829         movq    %rsi, 0x8(%rsp)
1830         movq    %rdx, 0x10(%rsp)
1831 
1832         movq    kernelbase(%rip), %rax
1833 #ifdef DEBUG
1834         cmpq    %rax, %rdi              /* %rdi = kaddr */
1835         jnb     1f
1836         leaq    .copyout_panic_msg(%rip), %rdi
1837         xorl    %eax, %eax
1838         call    panic
1839 1:
1840 #endif
1841         /*
1842          * pass lofault value as 4th argument to do_copy_fault
1843          */
1844         leaq    _copyout_err(%rip), %rcx
1845 
1846         movq    %gs:CPU_THREAD, %r9
1847         cmpq    %rax, %rsi              /* test uaddr < kernelbase */
1848         jae     3f                      /* take copyop if uaddr > kernelbase */
1849         SMAP_DISABLE_INSTR(3)
1850         jmp     do_copy_fault           /* Calls leave for us */
1851 
1852 _copyout_err:
1853         SMAP_ENABLE_INSTR(4)
1854         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1855         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1856 3:
1857         movq    T_COPYOPS(%r9), %rax
1858         cmpq    $0, %rax
1859         jz      2f
1860 
1861         /*
1862          * reload args for the copyop
1863          */
1864         movq    (%rsp), %rdi
1865         movq    0x8(%rsp), %rsi
1866         movq    0x10(%rsp), %rdx
1867         leave
1868         jmp     *CP_COPYOUT(%rax)
1869 
1870 2:      movl    $-1, %eax
1871         leave
1872         ret
1873         SET_SIZE(copyout)
1874 
1875 #elif defined(__i386)
1876 
1877 #define ARG_KADDR       4
1878 #define ARG_UADDR       8
1879 
1880         ENTRY(copyout)
1881         movl    kernelbase, %ecx
1882 #ifdef DEBUG
1883         cmpl    %ecx, ARG_KADDR(%esp)
1884         jnb     1f
1885         pushl   %ebp
1886         movl    %esp, %ebp
1887         pushl   $.copyout_panic_msg
1888         call    panic
1889 1:
1890 #endif
1891         lea     _copyout_err, %eax
1892         movl    %gs:CPU_THREAD, %edx
1893         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1894         jb      do_copy_fault
1895         jmp     3f
1896         
1897 _copyout_err:
1898         popl    %ecx
1899         popl    %edi
1900         movl    %ecx, T_LOFAULT(%edx)   /* restore original lofault */
1901         popl    %esi
1902         popl    %ebp
1903 3:
1904         movl    T_COPYOPS(%edx), %eax
1905         cmpl    $0, %eax
1906         jz      2f
1907         jmp     *CP_COPYOUT(%eax)
1908 
1909 2:      movl    $-1, %eax
1910         ret
1911         SET_SIZE(copyout)
1912 
1913 #undef  ARG_UADDR
1914 #undef  ARG_KADDR
1915 
1916 #endif  /* __i386 */
1917 #endif  /* __lint */
1918 
1919 #if defined(__lint)
1920 
1921 /* ARGSUSED */
1922 int
1923 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached)
1924 { return (0); }
1925 
1926 #else   /* __lint */
1927 
1928 #if defined(__amd64)
1929 
1930         ENTRY(xcopyout_nta)
1931         pushq   %rbp
1932         movq    %rsp, %rbp
1933         subq    $24, %rsp
1934 
1935         /*
1936          * save args in case we trap and need to rerun as a copyop
1937          */
1938         movq    %rdi, (%rsp)
1939         movq    %rsi, 0x8(%rsp)
1940         movq    %rdx, 0x10(%rsp)
1941 
1942         movq    kernelbase(%rip), %rax
1943 #ifdef DEBUG
1944         cmpq    %rax, %rdi              /* %rdi = kaddr */
1945         jnb     1f
1946         leaq    .xcopyout_panic_msg(%rip), %rdi
1947         xorl    %eax, %eax
1948         call    panic
1949 1:
1950 #endif
1951         movq    %gs:CPU_THREAD, %r9
1952         cmpq    %rax, %rsi              /* test uaddr < kernelbase */
1953         jae     4f
1954 
1955         cmpq    $0, %rcx                /* No non-temporal access? */
1956         /*
1957          * pass lofault value as 4th argument to do_copy_fault
1958          */
1959         leaq    _xcopyout_err(%rip), %rcx
1960         jnz     6f
1961         /*
1962          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
1963          */
1964         cmpq    $XCOPY_MIN_SIZE, %rdx
1965         jae     5f
1966 6:
1967         SMAP_DISABLE_INSTR(4)
1968         jmp     do_copy_fault
1969         
1970         /*
1971          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
1972          * count is COUNT_ALIGN_SIZE aligned.
1973          */
1974 5:
1975         movq    %rdi, %r10
1976         orq     %rsi, %r10
1977         andq    $NTA_ALIGN_MASK, %r10
1978         orq     %rdx, %r10
1979         andq    $COUNT_ALIGN_MASK, %r10
1980         jnz     6b      
1981         leaq    _xcopyout_nta_err(%rip), %rcx
1982         SMAP_DISABLE_INSTR(5)
1983         call    do_copy_fault_nta
1984         SMAP_ENABLE_INSTR(5)
1985         ret
1986 
1987 4:
1988         movl    $EFAULT, %eax
1989         jmp     3f
1990 
1991         /*
1992          * A fault during do_copy_fault or do_copy_fault_nta is
1993          * indicated through an errno value in %rax and we iret from the
1994          * trap handler to here.
1995          */
1996 _xcopyout_err:
1997         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1998 _xcopyout_nta_err:
1999         SMAP_ENABLE_INSTR(6)
2000         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2001 3:
2002         movq    T_COPYOPS(%r9), %r8
2003         cmpq    $0, %r8
2004         jz      2f
2005 
2006         /*
2007          * reload args for the copyop
2008          */
2009         movq    (%rsp), %rdi
2010         movq    0x8(%rsp), %rsi
2011         movq    0x10(%rsp), %rdx
2012         leave
2013         jmp     *CP_XCOPYOUT(%r8)
2014 
2015 2:      leave
2016         ret
2017         SET_SIZE(xcopyout_nta)
2018 
2019 #elif defined(__i386)
2020 
2021 #define ARG_KADDR       4
2022 #define ARG_UADDR       8
2023 #define ARG_COUNT       12
2024 #define ARG_CACHED      16
2025 
2026         ENTRY(xcopyout_nta)
2027         movl    kernelbase, %ecx
2028         lea     _xcopyout_err, %eax
2029         movl    %gs:CPU_THREAD, %edx
2030         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2031         jae     4f
2032 
2033         cmpl    $0, use_sse_copy        /* no sse support */
2034         jz      do_copy_fault
2035 
2036         cmpl    $0, ARG_CACHED(%esp)    /* copy_cached hint set? */
2037         jnz     do_copy_fault
2038 
2039         /*
2040          * Make sure cnt is >= XCOPY_MIN_SIZE bytes
2041          */
2042         cmpl    $XCOPY_MIN_SIZE, %edx
2043         jb      do_copy_fault
2044         
2045         /*
2046          * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2047          * count is COUNT_ALIGN_SIZE aligned.
2048          */
2049         movl    ARG_UADDR(%esp), %ecx
2050         orl     ARG_KADDR(%esp), %ecx
2051         andl    $NTA_ALIGN_MASK, %ecx
2052         orl     ARG_COUNT(%esp), %ecx
2053         andl    $COUNT_ALIGN_MASK, %ecx
2054         jnz     do_copy_fault
2055         jmp     do_copy_fault_nta
2056 
2057 4:
2058         movl    $EFAULT, %eax
2059         jmp     3f
2060 
2061         /*
2062          * A fault during do_copy_fault or do_copy_fault_nta is
2063          * indicated through an errno value in %eax and we iret from the
2064          * trap handler to here.
2065          */
2066 _xcopyout_err:
2067         / restore the original lofault
2068         popl    %ecx
2069         popl    %edi
2070         movl    %ecx, T_LOFAULT(%edx)   / original lofault
2071         popl    %esi
2072         popl    %ebp
2073 3:
2074         cmpl    $0, T_COPYOPS(%edx)
2075         jz      2f
2076         movl    T_COPYOPS(%edx), %eax
2077         jmp     *CP_XCOPYOUT(%eax)
2078 
2079 2:      rep;    ret     /* use 2 byte return instruction when branch target */
2080                         /* AMD Software Optimization Guide - Section 6.2 */
2081         SET_SIZE(xcopyout_nta)
2082 
2083 #undef  ARG_UADDR
2084 #undef  ARG_KADDR
2085 #undef  ARG_COUNT
2086 #undef  ARG_CACHED
2087 
2088 #endif  /* __i386 */
2089 #endif  /* __lint */
2090 
2091 /*
2092  * Copy a null terminated string from one point to another in
2093  * the kernel address space.
2094  */
2095 
2096 #if defined(__lint)
2097 
2098 /* ARGSUSED */
2099 int
2100 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied)
2101 { return (0); }
2102 
2103 #else   /* __lint */
2104 
2105 #if defined(__amd64)
2106 
2107         ENTRY(copystr)
2108         pushq   %rbp
2109         movq    %rsp, %rbp
2110 #ifdef DEBUG
2111         movq    kernelbase(%rip), %rax
2112         cmpq    %rax, %rdi              /* %rdi = from */
2113         jb      0f
2114         cmpq    %rax, %rsi              /* %rsi = to */
2115         jnb     1f
2116 0:      leaq    .copystr_panic_msg(%rip), %rdi
2117         xorl    %eax, %eax
2118         call    panic
2119 1:
2120 #endif
2121         movq    %gs:CPU_THREAD, %r9
2122         movq    T_LOFAULT(%r9), %r8     /* pass current lofault value as */
2123                                         /* 5th argument to do_copystr */
2124         xorl    %r10d,%r10d             /* pass smap restore need in %r10d */
2125                                         /* as a non-ABI 6th arg */
2126 do_copystr:
2127         movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
2128         movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
2129         movq    %r8, T_LOFAULT(%r9)     /* new lofault */
2130 
2131         movq    %rdx, %r8               /* save maxlength */
2132 
2133         cmpq    $0, %rdx                /* %rdx = maxlength */
2134         je      copystr_enametoolong    /* maxlength == 0 */
2135 
2136 copystr_loop:
2137         decq    %r8
2138         movb    (%rdi), %al
2139         incq    %rdi
2140         movb    %al, (%rsi)
2141         incq    %rsi
2142         cmpb    $0, %al
2143         je      copystr_null            /* null char */
2144         cmpq    $0, %r8
2145         jne     copystr_loop
2146 
2147 copystr_enametoolong:
2148         movl    $ENAMETOOLONG, %eax
2149         jmp     copystr_out
2150 
2151 copystr_null:
2152         xorl    %eax, %eax              /* no error */
2153 
2154 copystr_out:
2155         cmpq    $0, %rcx                /* want length? */
2156         je      copystr_smap            /* no */
2157         subq    %r8, %rdx               /* compute length and store it */
2158         movq    %rdx, (%rcx)
2159 
2160 copystr_smap:
2161         cmpl    $0, %r10d
2162         jz      copystr_done
2163         SMAP_ENABLE_INSTR(7)
2164 
2165 copystr_done:
2166         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2167         leave
2168         ret
2169         SET_SIZE(copystr)
2170 
2171 #elif defined(__i386)
2172 
2173 #define ARG_FROM        8
2174 #define ARG_TO          12
2175 #define ARG_MAXLEN      16
2176 #define ARG_LENCOPIED   20
2177 
2178         ENTRY(copystr)
2179 #ifdef DEBUG
2180         pushl   %ebp
2181         movl    %esp, %ebp
2182         movl    kernelbase, %eax
2183         cmpl    %eax, ARG_FROM(%esp)
2184         jb      0f
2185         cmpl    %eax, ARG_TO(%esp)
2186         jnb     1f
2187 0:      pushl   $.copystr_panic_msg
2188         call    panic
2189 1:      popl    %ebp
2190 #endif
2191         /* get the current lofault address */
2192         movl    %gs:CPU_THREAD, %eax
2193         movl    T_LOFAULT(%eax), %eax
2194 do_copystr:
2195         pushl   %ebp                    /* setup stack frame */
2196         movl    %esp, %ebp
2197         pushl   %ebx                    /* save registers */
2198         pushl   %edi
2199 
2200         movl    %gs:CPU_THREAD, %ebx    
2201         movl    T_LOFAULT(%ebx), %edi
2202         pushl   %edi                    /* save the current lofault */
2203         movl    %eax, T_LOFAULT(%ebx)   /* new lofault */
2204 
2205         movl    ARG_MAXLEN(%ebp), %ecx
2206         cmpl    $0, %ecx
2207         je      copystr_enametoolong    /* maxlength == 0 */
2208 
2209         movl    ARG_FROM(%ebp), %ebx    /* source address */
2210         movl    ARG_TO(%ebp), %edx      /* destination address */
2211 
2212 copystr_loop:
2213         decl    %ecx
2214         movb    (%ebx), %al
2215         incl    %ebx    
2216         movb    %al, (%edx)
2217         incl    %edx
2218         cmpb    $0, %al
2219         je      copystr_null            /* null char */
2220         cmpl    $0, %ecx
2221         jne     copystr_loop
2222 
2223 copystr_enametoolong:
2224         movl    $ENAMETOOLONG, %eax
2225         jmp     copystr_out
2226 
2227 copystr_null:
2228         xorl    %eax, %eax              /* no error */
2229 
2230 copystr_out:
2231         cmpl    $0, ARG_LENCOPIED(%ebp) /* want length? */
2232         je      copystr_done            /* no */
2233         movl    ARG_MAXLEN(%ebp), %edx
2234         subl    %ecx, %edx              /* compute length and store it */
2235         movl    ARG_LENCOPIED(%ebp), %ecx
2236         movl    %edx, (%ecx)
2237 
2238 copystr_done:
2239         popl    %edi
2240         movl    %gs:CPU_THREAD, %ebx    
2241         movl    %edi, T_LOFAULT(%ebx)   /* restore the original lofault */
2242 
2243         popl    %edi
2244         popl    %ebx
2245         popl    %ebp
2246         ret     
2247         SET_SIZE(copystr)
2248 
2249 #undef  ARG_FROM
2250 #undef  ARG_TO
2251 #undef  ARG_MAXLEN
2252 #undef  ARG_LENCOPIED
2253 
2254 #endif  /* __i386 */
2255 #endif  /* __lint */
2256 
2257 /*
2258  * Copy a null terminated string from the user address space into
2259  * the kernel address space.
2260  */
2261 
2262 #if defined(__lint)
2263 
2264 /* ARGSUSED */
2265 int
2266 copyinstr(const char *uaddr, char *kaddr, size_t maxlength,
2267     size_t *lencopied)
2268 { return (0); }
2269 
2270 #else   /* __lint */
2271 
2272 #if defined(__amd64)
2273 
2274         ENTRY(copyinstr)
2275         pushq   %rbp
2276         movq    %rsp, %rbp
2277         subq    $32, %rsp
2278 
2279         /*
2280          * save args in case we trap and need to rerun as a copyop
2281          */
2282         movq    %rdi, (%rsp)
2283         movq    %rsi, 0x8(%rsp)
2284         movq    %rdx, 0x10(%rsp)
2285         movq    %rcx, 0x18(%rsp)
2286 
2287         movq    kernelbase(%rip), %rax
2288 #ifdef DEBUG
2289         cmpq    %rax, %rsi              /* %rsi = kaddr */
2290         jnb     1f
2291         leaq    .copyinstr_panic_msg(%rip), %rdi
2292         xorl    %eax, %eax
2293         call    panic
2294 1:
2295 #endif
2296         /*
2297          * pass lofault value as 5th argument to do_copystr
2298          * do_copystr expects whether or not we need smap in %r10d
2299          */
2300         leaq    _copyinstr_error(%rip), %r8
2301         movl    $1, %r10d
2302 
2303         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
2304         jae     4f
2305         SMAP_DISABLE_INSTR(6)
2306         jmp     do_copystr
2307 4:
2308         movq    %gs:CPU_THREAD, %r9
2309         jmp     3f
2310 
2311 _copyinstr_error:
2312         SMAP_ENABLE_INSTR(8)
2313         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2314 3:
2315         movq    T_COPYOPS(%r9), %rax
2316         cmpq    $0, %rax
2317         jz      2f
2318 
2319         /*
2320          * reload args for the copyop
2321          */
2322         movq    (%rsp), %rdi
2323         movq    0x8(%rsp), %rsi
2324         movq    0x10(%rsp), %rdx
2325         movq    0x18(%rsp), %rcx
2326         leave
2327         jmp     *CP_COPYINSTR(%rax)
2328         
2329 2:      movl    $EFAULT, %eax           /* return EFAULT */
2330         leave
2331         ret
2332         SET_SIZE(copyinstr)
2333 
2334 #elif defined(__i386)
2335 
2336 #define ARG_UADDR       4
2337 #define ARG_KADDR       8
2338 
2339         ENTRY(copyinstr)
2340         movl    kernelbase, %ecx
2341 #ifdef DEBUG
2342         cmpl    %ecx, ARG_KADDR(%esp)
2343         jnb     1f
2344         pushl   %ebp
2345         movl    %esp, %ebp
2346         pushl   $.copyinstr_panic_msg
2347         call    panic
2348 1:
2349 #endif
2350         lea     _copyinstr_error, %eax
2351         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2352         jb      do_copystr
2353         movl    %gs:CPU_THREAD, %edx
2354         jmp     3f
2355 
2356 _copyinstr_error:
2357         popl    %edi
2358         movl    %gs:CPU_THREAD, %edx    
2359         movl    %edi, T_LOFAULT(%edx)   /* original lofault */
2360 
2361         popl    %edi
2362         popl    %ebx
2363         popl    %ebp
2364 3:
2365         movl    T_COPYOPS(%edx), %eax
2366         cmpl    $0, %eax
2367         jz      2f
2368         jmp     *CP_COPYINSTR(%eax)
2369         
2370 2:      movl    $EFAULT, %eax           /* return EFAULT */
2371         ret
2372         SET_SIZE(copyinstr)
2373 
2374 #undef  ARG_UADDR
2375 #undef  ARG_KADDR
2376 
2377 #endif  /* __i386 */
2378 #endif  /* __lint */
2379 
2380 /*
2381  * Copy a null terminated string from the kernel
2382  * address space to the user address space.
2383  */
2384 
2385 #if defined(__lint)
2386 
2387 /* ARGSUSED */
2388 int
2389 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength,
2390     size_t *lencopied)
2391 { return (0); }
2392 
2393 #else   /* __lint */
2394 
2395 #if defined(__amd64)
2396 
2397         ENTRY(copyoutstr)
2398         pushq   %rbp
2399         movq    %rsp, %rbp
2400         subq    $32, %rsp
2401 
2402         /*
2403          * save args in case we trap and need to rerun as a copyop
2404          */
2405         movq    %rdi, (%rsp)
2406         movq    %rsi, 0x8(%rsp)
2407         movq    %rdx, 0x10(%rsp)
2408         movq    %rcx, 0x18(%rsp)
2409 
2410         movq    kernelbase(%rip), %rax
2411 #ifdef DEBUG
2412         cmpq    %rax, %rdi              /* %rdi = kaddr */
2413         jnb     1f
2414         leaq    .copyoutstr_panic_msg(%rip), %rdi
2415         jmp     call_panic              /* setup stack and call panic */
2416 1:
2417 #endif
2418         /*
2419          * pass lofault value as 5th argument to do_copystr
2420          * pass one as 6th argument to do_copystr in %r10d
2421          */
2422         leaq    _copyoutstr_error(%rip), %r8
2423         movl    $1, %r10d
2424 
2425         cmpq    %rax, %rsi              /* test uaddr < kernelbase */
2426         jae     4f
2427         SMAP_DISABLE_INSTR(7)
2428         jmp     do_copystr
2429 4:
2430         movq    %gs:CPU_THREAD, %r9
2431         jmp     3f
2432 
2433 _copyoutstr_error:
2434         SMAP_ENABLE_INSTR(9)
2435         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2436 3:
2437         movq    T_COPYOPS(%r9), %rax
2438         cmpq    $0, %rax
2439         jz      2f
2440 
2441         /*
2442          * reload args for the copyop
2443          */
2444         movq    (%rsp), %rdi
2445         movq    0x8(%rsp), %rsi
2446         movq    0x10(%rsp), %rdx
2447         movq    0x18(%rsp), %rcx
2448         leave
2449         jmp     *CP_COPYOUTSTR(%rax)
2450         
2451 2:      movl    $EFAULT, %eax           /* return EFAULT */
2452         leave
2453         ret
2454         SET_SIZE(copyoutstr)    
2455         
2456 #elif defined(__i386)
2457 
2458 #define ARG_KADDR       4
2459 #define ARG_UADDR       8
2460 
2461         ENTRY(copyoutstr)
2462         movl    kernelbase, %ecx
2463 #ifdef DEBUG
2464         cmpl    %ecx, ARG_KADDR(%esp)
2465         jnb     1f
2466         pushl   %ebp
2467         movl    %esp, %ebp
2468         pushl   $.copyoutstr_panic_msg
2469         call    panic
2470 1:
2471 #endif
2472         lea     _copyoutstr_error, %eax
2473         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2474         jb      do_copystr
2475         movl    %gs:CPU_THREAD, %edx
2476         jmp     3f
2477 
2478 _copyoutstr_error:
2479         popl    %edi
2480         movl    %gs:CPU_THREAD, %edx    
2481         movl    %edi, T_LOFAULT(%edx)   /* restore the original lofault */
2482 
2483         popl    %edi
2484         popl    %ebx
2485         popl    %ebp
2486 3:
2487         movl    T_COPYOPS(%edx), %eax
2488         cmpl    $0, %eax
2489         jz      2f
2490         jmp     *CP_COPYOUTSTR(%eax)
2491 
2492 2:      movl    $EFAULT, %eax           /* return EFAULT */
2493         ret
2494         SET_SIZE(copyoutstr)
2495         
2496 #undef  ARG_KADDR
2497 #undef  ARG_UADDR
2498 
2499 #endif  /* __i386 */
2500 #endif  /* __lint */
2501 
2502 /*
2503  * Since all of the fuword() variants are so similar, we have a macro to spit
2504  * them out.  This allows us to create DTrace-unobservable functions easily.
2505  */
2506         
2507 #if defined(__lint)
2508 
2509 #if defined(__amd64)
2510 
2511 /* ARGSUSED */
2512 int
2513 fuword64(const void *addr, uint64_t *dst)
2514 { return (0); }
2515 
2516 #endif
2517 
2518 /* ARGSUSED */
2519 int
2520 fuword32(const void *addr, uint32_t *dst)
2521 { return (0); }
2522 
2523 /* ARGSUSED */
2524 int
2525 fuword16(const void *addr, uint16_t *dst)
2526 { return (0); }
2527 
2528 /* ARGSUSED */
2529 int
2530 fuword8(const void *addr, uint8_t *dst)
2531 { return (0); }
2532 
2533 #else   /* __lint */
2534 
2535 #if defined(__amd64)
2536 
2537 /*
2538  * Note that we don't save and reload the arguments here
2539  * because their values are not altered in the copy path.
2540  * Additionally, when successful, the smap_enable jmp will
2541  * actually return us to our original caller.
2542  */
2543 
2544 #define FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2545         ENTRY(NAME)                             \
2546         movq    %gs:CPU_THREAD, %r9;            \
2547         cmpq    kernelbase(%rip), %rdi;         \
2548         jae     1f;                             \
2549         leaq    _flt_/**/NAME, %rdx;            \
2550         movq    %rdx, T_LOFAULT(%r9);           \
2551         SMAP_DISABLE_INSTR(DISNUM)              \
2552         INSTR   (%rdi), REG;                    \
2553         movq    $0, T_LOFAULT(%r9);             \
2554         INSTR   REG, (%rsi);                    \
2555         xorl    %eax, %eax;                     \
2556         SMAP_ENABLE_INSTR(EN1)                  \
2557         ret;                                    \
2558 _flt_/**/NAME:                                  \
2559         SMAP_ENABLE_INSTR(EN2)                  \
2560         movq    $0, T_LOFAULT(%r9);             \
2561 1:                                              \
2562         movq    T_COPYOPS(%r9), %rax;           \
2563         cmpq    $0, %rax;                       \
2564         jz      2f;                             \
2565         jmp     *COPYOP(%rax);                  \
2566 2:                                              \
2567         movl    $-1, %eax;                      \
2568         ret;                                    \
2569         SET_SIZE(NAME)
2570         
2571         FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2572         FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2573         FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2574         FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2575 
2576 #elif defined(__i386)
2577 
2578 #define FUWORD(NAME, INSTR, REG, COPYOP)        \
2579         ENTRY(NAME)                             \
2580         movl    %gs:CPU_THREAD, %ecx;           \
2581         movl    kernelbase, %eax;               \
2582         cmpl    %eax, 4(%esp);                  \
2583         jae     1f;                             \
2584         lea     _flt_/**/NAME, %edx;            \
2585         movl    %edx, T_LOFAULT(%ecx);          \
2586         movl    4(%esp), %eax;                  \
2587         movl    8(%esp), %edx;                  \
2588         INSTR   (%eax), REG;                    \
2589         movl    $0, T_LOFAULT(%ecx);            \
2590         INSTR   REG, (%edx);                    \
2591         xorl    %eax, %eax;                     \
2592         ret;                                    \
2593 _flt_/**/NAME:                                  \
2594         movl    $0, T_LOFAULT(%ecx);            \
2595 1:                                              \
2596         movl    T_COPYOPS(%ecx), %eax;          \
2597         cmpl    $0, %eax;                       \
2598         jz      2f;                             \
2599         jmp     *COPYOP(%eax);                  \
2600 2:                                              \
2601         movl    $-1, %eax;                      \
2602         ret;                                    \
2603         SET_SIZE(NAME)
2604 
2605         FUWORD(fuword32, movl, %eax, CP_FUWORD32)
2606         FUWORD(fuword16, movw, %ax, CP_FUWORD16)
2607         FUWORD(fuword8, movb, %al, CP_FUWORD8)
2608 
2609 #endif  /* __i386 */
2610 
2611 #undef  FUWORD
2612 
2613 #endif  /* __lint */
2614 
2615 /*
2616  * Set user word.
2617  */
2618 
2619 #if defined(__lint)
2620 
2621 #if defined(__amd64)
2622 
2623 /* ARGSUSED */
2624 int
2625 suword64(void *addr, uint64_t value)
2626 { return (0); }
2627 
2628 #endif
2629 
2630 /* ARGSUSED */
2631 int
2632 suword32(void *addr, uint32_t value)
2633 { return (0); }
2634 
2635 /* ARGSUSED */
2636 int
2637 suword16(void *addr, uint16_t value)
2638 { return (0); }
2639 
2640 /* ARGSUSED */
2641 int
2642 suword8(void *addr, uint8_t value)
2643 { return (0); }
2644 
2645 #else   /* lint */
2646 
2647 #if defined(__amd64)
2648 
2649 /*
2650  * Note that we don't save and reload the arguments here
2651  * because their values are not altered in the copy path.
2652  */
2653 
2654 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2655         ENTRY(NAME)                             \
2656         movq    %gs:CPU_THREAD, %r9;            \
2657         cmpq    kernelbase(%rip), %rdi;         \
2658         jae     1f;                             \
2659         leaq    _flt_/**/NAME, %rdx;            \
2660         SMAP_DISABLE_INSTR(DISNUM)              \
2661         movq    %rdx, T_LOFAULT(%r9);           \
2662         INSTR   REG, (%rdi);                    \
2663         movq    $0, T_LOFAULT(%r9);             \
2664         xorl    %eax, %eax;                     \
2665         SMAP_ENABLE_INSTR(EN1)                  \
2666         ret;                                    \
2667 _flt_/**/NAME:                                  \
2668         SMAP_ENABLE_INSTR(EN2)                  \
2669         movq    $0, T_LOFAULT(%r9);             \
2670 1:                                              \
2671         movq    T_COPYOPS(%r9), %rax;           \
2672         cmpq    $0, %rax;                       \
2673         jz      3f;                             \
2674         jmp     *COPYOP(%rax);                  \
2675 3:                                              \
2676         movl    $-1, %eax;                      \
2677         ret;                                    \
2678         SET_SIZE(NAME)
2679 
2680         SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2681         SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2682         SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2683         SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2684 
2685 #elif defined(__i386)
2686 
2687 #define SUWORD(NAME, INSTR, REG, COPYOP)        \
2688         ENTRY(NAME)                             \
2689         movl    %gs:CPU_THREAD, %ecx;           \
2690         movl    kernelbase, %eax;               \
2691         cmpl    %eax, 4(%esp);                  \
2692         jae     1f;                             \
2693         lea     _flt_/**/NAME, %edx;            \
2694         movl    %edx, T_LOFAULT(%ecx);          \
2695         movl    4(%esp), %eax;                  \
2696         movl    8(%esp), %edx;                  \
2697         INSTR   REG, (%eax);                    \
2698         movl    $0, T_LOFAULT(%ecx);            \
2699         xorl    %eax, %eax;                     \
2700         ret;                                    \
2701 _flt_/**/NAME:                                  \
2702         movl    $0, T_LOFAULT(%ecx);            \
2703 1:                                              \
2704         movl    T_COPYOPS(%ecx), %eax;          \
2705         cmpl    $0, %eax;                       \
2706         jz      3f;                             \
2707         movl    COPYOP(%eax), %ecx;             \
2708         jmp     *%ecx;                          \
2709 3:                                              \
2710         movl    $-1, %eax;                      \
2711         ret;                                    \
2712         SET_SIZE(NAME)
2713 
2714         SUWORD(suword32, movl, %edx, CP_SUWORD32)
2715         SUWORD(suword16, movw, %dx, CP_SUWORD16)
2716         SUWORD(suword8, movb, %dl, CP_SUWORD8)
2717 
2718 #endif  /* __i386 */
2719 
2720 #undef  SUWORD
2721 
2722 #endif  /* __lint */
2723 
2724 #if defined(__lint)
2725 
2726 #if defined(__amd64)
2727 
2728 /*ARGSUSED*/
2729 void
2730 fuword64_noerr(const void *addr, uint64_t *dst)
2731 {}
2732 
2733 #endif
2734 
2735 /*ARGSUSED*/
2736 void
2737 fuword32_noerr(const void *addr, uint32_t *dst)
2738 {}
2739 
2740 /*ARGSUSED*/
2741 void
2742 fuword8_noerr(const void *addr, uint8_t *dst)
2743 {}
2744 
2745 /*ARGSUSED*/
2746 void
2747 fuword16_noerr(const void *addr, uint16_t *dst)
2748 {}
2749 
2750 #else   /* __lint */
2751 
2752 #if defined(__amd64)
2753 
2754 #define FUWORD_NOERR(NAME, INSTR, REG)          \
2755         ENTRY(NAME)                             \
2756         cmpq    kernelbase(%rip), %rdi;         \
2757         cmovnbq kernelbase(%rip), %rdi;         \
2758         INSTR   (%rdi), REG;                    \
2759         INSTR   REG, (%rsi);                    \
2760         ret;                                    \
2761         SET_SIZE(NAME)
2762 
2763         FUWORD_NOERR(fuword64_noerr, movq, %rax)
2764         FUWORD_NOERR(fuword32_noerr, movl, %eax)
2765         FUWORD_NOERR(fuword16_noerr, movw, %ax)
2766         FUWORD_NOERR(fuword8_noerr, movb, %al)
2767 
2768 #elif defined(__i386)
2769 
2770 #define FUWORD_NOERR(NAME, INSTR, REG)          \
2771         ENTRY(NAME)                             \
2772         movl    4(%esp), %eax;                  \
2773         cmpl    kernelbase, %eax;               \
2774         jb      1f;                             \
2775         movl    kernelbase, %eax;               \
2776 1:      movl    8(%esp), %edx;                  \
2777         INSTR   (%eax), REG;                    \
2778         INSTR   REG, (%edx);                    \
2779         ret;                                    \
2780         SET_SIZE(NAME)
2781 
2782         FUWORD_NOERR(fuword32_noerr, movl, %ecx)
2783         FUWORD_NOERR(fuword16_noerr, movw, %cx)
2784         FUWORD_NOERR(fuword8_noerr, movb, %cl)
2785 
2786 #endif  /* __i386 */
2787 
2788 #undef  FUWORD_NOERR
2789 
2790 #endif  /* __lint */
2791 
2792 #if defined(__lint)
2793 
2794 #if defined(__amd64)
2795 
2796 /*ARGSUSED*/
2797 void
2798 suword64_noerr(void *addr, uint64_t value)
2799 {}
2800 
2801 #endif
2802 
2803 /*ARGSUSED*/
2804 void
2805 suword32_noerr(void *addr, uint32_t value)
2806 {}
2807 
2808 /*ARGSUSED*/
2809 void
2810 suword16_noerr(void *addr, uint16_t value)
2811 {}
2812 
2813 /*ARGSUSED*/
2814 void
2815 suword8_noerr(void *addr, uint8_t value)
2816 {}
2817 
2818 #else   /* lint */
2819 
2820 #if defined(__amd64)
2821 
2822 #define SUWORD_NOERR(NAME, INSTR, REG)          \
2823         ENTRY(NAME)                             \
2824         cmpq    kernelbase(%rip), %rdi;         \
2825         cmovnbq kernelbase(%rip), %rdi;         \
2826         INSTR   REG, (%rdi);                    \
2827         ret;                                    \
2828         SET_SIZE(NAME)
2829 
2830         SUWORD_NOERR(suword64_noerr, movq, %rsi)
2831         SUWORD_NOERR(suword32_noerr, movl, %esi)
2832         SUWORD_NOERR(suword16_noerr, movw, %si)
2833         SUWORD_NOERR(suword8_noerr, movb, %sil)
2834 
2835 #elif defined(__i386)
2836 
2837 #define SUWORD_NOERR(NAME, INSTR, REG)          \
2838         ENTRY(NAME)                             \
2839         movl    4(%esp), %eax;                  \
2840         cmpl    kernelbase, %eax;               \
2841         jb      1f;                             \
2842         movl    kernelbase, %eax;               \
2843 1:                                              \
2844         movl    8(%esp), %edx;                  \
2845         INSTR   REG, (%eax);                    \
2846         ret;                                    \
2847         SET_SIZE(NAME)
2848 
2849         SUWORD_NOERR(suword32_noerr, movl, %edx)
2850         SUWORD_NOERR(suword16_noerr, movw, %dx)
2851         SUWORD_NOERR(suword8_noerr, movb, %dl)
2852 
2853 #endif  /* __i386 */
2854 
2855 #undef  SUWORD_NOERR
2856 
2857 #endif  /* lint */
2858 
2859 
2860 #if defined(__lint)
2861 
2862 /*ARGSUSED*/
2863 int
2864 subyte(void *addr, uchar_t value)
2865 { return (0); }
2866 
2867 /*ARGSUSED*/
2868 void
2869 subyte_noerr(void *addr, uchar_t value)
2870 {}
2871 
2872 /*ARGSUSED*/
2873 int
2874 fulword(const void *addr, ulong_t *valuep)
2875 { return (0); }
2876 
2877 /*ARGSUSED*/
2878 void
2879 fulword_noerr(const void *addr, ulong_t *valuep)
2880 {}
2881 
2882 /*ARGSUSED*/
2883 int
2884 sulword(void *addr, ulong_t valuep)
2885 { return (0); }
2886 
2887 /*ARGSUSED*/
2888 void
2889 sulword_noerr(void *addr, ulong_t valuep)
2890 {}
2891 
2892 #else
2893 
2894         .weak   subyte
2895         subyte=suword8
2896         .weak   subyte_noerr
2897         subyte_noerr=suword8_noerr
2898 
2899 #if defined(__amd64)
2900 
2901         .weak   fulword
2902         fulword=fuword64
2903         .weak   fulword_noerr
2904         fulword_noerr=fuword64_noerr
2905         .weak   sulword
2906         sulword=suword64
2907         .weak   sulword_noerr
2908         sulword_noerr=suword64_noerr
2909 
2910 #elif defined(__i386)
2911 
2912         .weak   fulword
2913         fulword=fuword32
2914         .weak   fulword_noerr
2915         fulword_noerr=fuword32_noerr
2916         .weak   sulword
2917         sulword=suword32
2918         .weak   sulword_noerr
2919         sulword_noerr=suword32_noerr
2920 
2921 #endif /* __i386 */
2922 
2923 #endif /* __lint */
2924 
2925 #if defined(__lint)
2926 
2927 /*
2928  * Copy a block of storage - must not overlap (from + len <= to).
2929  * No fault handler installed (to be called under on_fault())
2930  */
2931 
2932 /* ARGSUSED */
2933 void
2934 copyout_noerr(const void *kfrom, void *uto, size_t count)
2935 {}
2936 
2937 /* ARGSUSED */
2938 void
2939 copyin_noerr(const void *ufrom, void *kto, size_t count)
2940 {}
2941 
2942 /*
2943  * Zero a block of storage in user space
2944  */
2945 
2946 /* ARGSUSED */
2947 void
2948 uzero(void *addr, size_t count)
2949 {}
2950 
2951 /*
2952  * copy a block of storage in user space
2953  */
2954 
2955 /* ARGSUSED */
2956 void
2957 ucopy(const void *ufrom, void *uto, size_t ulength)
2958 {}
2959 
2960 /*
2961  * copy a string in user space
2962  */
2963 
2964 /* ARGSUSED */
2965 void
2966 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied)
2967 {}
2968 
2969 #else /* __lint */
2970 
2971 #if defined(__amd64)
2972 
2973         ENTRY(copyin_noerr)
2974         movq    kernelbase(%rip), %rax
2975 #ifdef DEBUG
2976         cmpq    %rax, %rsi              /* %rsi = kto */
2977         jae     1f
2978         leaq    .cpyin_ne_pmsg(%rip), %rdi
2979         jmp     call_panic              /* setup stack and call panic */
2980 1:
2981 #endif
2982         cmpq    %rax, %rdi              /* ufrom < kernelbase */
2983         jb      do_copy
2984         movq    %rax, %rdi              /* force fault at kernelbase */
2985         jmp     do_copy
2986         SET_SIZE(copyin_noerr)
2987 
2988         ENTRY(copyout_noerr)
2989         movq    kernelbase(%rip), %rax
2990 #ifdef DEBUG
2991         cmpq    %rax, %rdi              /* %rdi = kfrom */
2992         jae     1f
2993         leaq    .cpyout_ne_pmsg(%rip), %rdi
2994         jmp     call_panic              /* setup stack and call panic */
2995 1:
2996 #endif
2997         cmpq    %rax, %rsi              /* uto < kernelbase */
2998         jb      do_copy
2999         movq    %rax, %rsi              /* force fault at kernelbase */
3000         jmp     do_copy
3001         SET_SIZE(copyout_noerr)
3002 
3003         ENTRY(uzero)
3004         movq    kernelbase(%rip), %rax
3005         cmpq    %rax, %rdi
3006         jb      do_zero
3007         movq    %rax, %rdi      /* force fault at kernelbase */
3008         jmp     do_zero
3009         SET_SIZE(uzero)
3010 
3011         ENTRY(ucopy)
3012         movq    kernelbase(%rip), %rax
3013         cmpq    %rax, %rdi
3014         cmovaeq %rax, %rdi      /* force fault at kernelbase */
3015         cmpq    %rax, %rsi
3016         cmovaeq %rax, %rsi      /* force fault at kernelbase */
3017         jmp     do_copy
3018         SET_SIZE(ucopy)
3019 
3020         /*
3021          * Note, the frame pointer is required here becuase do_copystr expects
3022          * to be able to pop it off!
3023          */
3024         ENTRY(ucopystr)
3025         pushq   %rbp
3026         movq    %rsp, %rbp
3027         movq    kernelbase(%rip), %rax
3028         cmpq    %rax, %rdi
3029         cmovaeq %rax, %rdi      /* force fault at kernelbase */
3030         cmpq    %rax, %rsi
3031         cmovaeq %rax, %rsi      /* force fault at kernelbase */
3032         /* do_copystr expects lofault address in %r8 */
3033         /* do_copystr expects whether or not we need smap in %r10 */
3034         xorl    %r10d, %r10d
3035         movq    %gs:CPU_THREAD, %r8
3036         movq    T_LOFAULT(%r8), %r8
3037         jmp     do_copystr
3038         SET_SIZE(ucopystr)
3039 
3040 #elif defined(__i386)
3041 
3042         ENTRY(copyin_noerr)
3043         movl    kernelbase, %eax
3044 #ifdef DEBUG
3045         cmpl    %eax, 8(%esp)
3046         jae     1f
3047         pushl   $.cpyin_ne_pmsg
3048         call    panic
3049 1:
3050 #endif
3051         cmpl    %eax, 4(%esp)
3052         jb      do_copy
3053         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3054         jmp     do_copy
3055         SET_SIZE(copyin_noerr)
3056 
3057         ENTRY(copyout_noerr)
3058         movl    kernelbase, %eax
3059 #ifdef DEBUG
3060         cmpl    %eax, 4(%esp)
3061         jae     1f
3062         pushl   $.cpyout_ne_pmsg
3063         call    panic
3064 1:
3065 #endif
3066         cmpl    %eax, 8(%esp)
3067         jb      do_copy
3068         movl    %eax, 8(%esp)   /* force fault at kernelbase */
3069         jmp     do_copy
3070         SET_SIZE(copyout_noerr)
3071 
3072         ENTRY(uzero)
3073         movl    kernelbase, %eax
3074         cmpl    %eax, 4(%esp)
3075         jb      do_zero
3076         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3077         jmp     do_zero
3078         SET_SIZE(uzero)
3079 
3080         ENTRY(ucopy)
3081         movl    kernelbase, %eax
3082         cmpl    %eax, 4(%esp)
3083         jb      1f
3084         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3085 1:
3086         cmpl    %eax, 8(%esp)
3087         jb      do_copy
3088         movl    %eax, 8(%esp)   /* force fault at kernelbase */
3089         jmp     do_copy
3090         SET_SIZE(ucopy)
3091 
3092         ENTRY(ucopystr)
3093         movl    kernelbase, %eax
3094         cmpl    %eax, 4(%esp)
3095         jb      1f
3096         movl    %eax, 4(%esp)   /* force fault at kernelbase */
3097 1:
3098         cmpl    %eax, 8(%esp)
3099         jb      2f
3100         movl    %eax, 8(%esp)   /* force fault at kernelbase */
3101 2:
3102         /* do_copystr expects the lofault address in %eax */
3103         movl    %gs:CPU_THREAD, %eax
3104         movl    T_LOFAULT(%eax), %eax
3105         jmp     do_copystr
3106         SET_SIZE(ucopystr)
3107 
3108 #endif  /* __i386 */
3109 
3110 #ifdef DEBUG
3111         .data
3112 .kcopy_panic_msg:
3113         .string "kcopy: arguments below kernelbase"
3114 .bcopy_panic_msg:
3115         .string "bcopy: arguments below kernelbase"
3116 .kzero_panic_msg:
3117         .string "kzero: arguments below kernelbase"
3118 .bzero_panic_msg:
3119         .string "bzero: arguments below kernelbase"
3120 .copyin_panic_msg:
3121         .string "copyin: kaddr argument below kernelbase"
3122 .xcopyin_panic_msg:
3123         .string "xcopyin: kaddr argument below kernelbase"
3124 .copyout_panic_msg:
3125         .string "copyout: kaddr argument below kernelbase"
3126 .xcopyout_panic_msg:
3127         .string "xcopyout: kaddr argument below kernelbase"
3128 .copystr_panic_msg:
3129         .string "copystr: arguments in user space"
3130 .copyinstr_panic_msg:
3131         .string "copyinstr: kaddr argument not in kernel address space"
3132 .copyoutstr_panic_msg:
3133         .string "copyoutstr: kaddr argument not in kernel address space"
3134 .cpyin_ne_pmsg:
3135         .string "copyin_noerr: argument not in kernel address space"
3136 .cpyout_ne_pmsg:
3137         .string "copyout_noerr: argument not in kernel address space"
3138 #endif
3139 
3140 #endif  /* __lint */
3141 
3142 /*
3143  * These functions are used for SMAP, supervisor mode access protection. They
3144  * are hotpatched to become real instructions when the system starts up which is
3145  * done in mlsetup() as a part of enabling the other CR4 related features.
3146  *
3147  * Generally speaking, smap_disable() is a stac instruction and smap_enable is a
3148  * clac instruction. It's safe to call these any number of times, and in fact,
3149  * out of paranoia, the kernel will likely call it at several points.
3150  */
3151 
3152 #if defined(__lint)
3153 
3154 void
3155 smap_enable(void)
3156 {}
3157 
3158 void
3159 smap_disable(void)
3160 {}
3161 
3162 #else
3163 
3164 #if defined (__amd64) || defined(__i386)
3165         ENTRY(smap_disable)
3166         nop
3167         nop
3168         nop
3169         ret
3170         SET_SIZE(smap_disable)
3171 
3172         ENTRY(smap_enable)
3173         nop
3174         nop
3175         nop
3176         ret
3177         SET_SIZE(smap_enable)
3178 
3179 #endif /* __amd64 || __i386 */
3180 
3181 #endif /* __lint */
3182 
3183 #ifndef __lint
3184 
3185 .data
3186 .align  4
3187 .globl  _smap_enable_patch_count
3188 .type   _smap_enable_patch_count,@object
3189 .size   _smap_enable_patch_count, 4
3190 _smap_enable_patch_count:
3191         .long   SMAP_ENABLE_COUNT
3192 
3193 .globl  _smap_disable_patch_count
3194 .type   _smap_disable_patch_count,@object
3195 .size   _smap_disable_patch_count, 4
3196 _smap_disable_patch_count:
3197         .long SMAP_DISABLE_COUNT
3198 
3199 #endif /* __lint */