Print this page
11787 Kernel needs to be built with retpolines
11788 Kernel needs to generally use RSB stuffing
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: John Levon <john.levon@joyent.com>


  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2009, Intel Corporation
  28  * All rights reserved.
  29  */
  30 
  31 /*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.        */
  32 /*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T              */
  33 /*         All Rights Reserved                                          */
  34 
  35 /*       Copyright (c) 1987, 1988 Microsoft Corporation                 */
  36 /*         All Rights Reserved                                          */
  37 
  38 /*
  39  * Copyright (c) 2018 Joyent, Inc.
  40  */
  41 
  42 #include <sys/errno.h>
  43 #include <sys/asm_linkage.h>
  44 
  45 #if defined(__lint)
  46 #include <sys/types.h>
  47 #include <sys/systm.h>
  48 #else   /* __lint */
  49 #include "assym.h"
  50 #endif  /* __lint */
  51 
  52 #define KCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  53 #define XCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  54 /*
  55  * Non-temopral access (NTA) alignment requirement
  56  */
  57 #define NTA_ALIGN_SIZE  4       /* Must be at least 4-byte aligned */
  58 #define NTA_ALIGN_MASK  _CONST(NTA_ALIGN_SIZE-1)
  59 #define COUNT_ALIGN_SIZE        16      /* Must be at least 16-byte aligned */


 465          * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
 466          * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
 467          * uses these registers in future they must be saved and restored.
 468          */
 469         ALTENTRY(bcopy_altentry)
 470 do_copy:
 471 #define L(s) .bcopy/**/s
 472         cmpq    $0x50, %rdx             /* 80 */
 473         jae     bcopy_ck_size
 474 
 475         /*
 476          * Performance data shows many caller's copy small buffers. So for
 477          * best perf for these sizes unrolled code is used. Store data without
 478          * worrying about alignment.
 479          */
 480         leaq    L(fwdPxQx)(%rip), %r10
 481         addq    %rdx, %rdi
 482         addq    %rdx, %rsi
 483         movslq  (%r10,%rdx,4), %rcx
 484         leaq    (%rcx,%r10,1), %r10
 485         jmpq    *%r10
 486 
 487         .p2align 4
 488 L(fwdPxQx):
 489         .int       L(P0Q0)-L(fwdPxQx)   /* 0 */
 490         .int       L(P1Q0)-L(fwdPxQx)
 491         .int       L(P2Q0)-L(fwdPxQx)
 492         .int       L(P3Q0)-L(fwdPxQx)
 493         .int       L(P4Q0)-L(fwdPxQx)
 494         .int       L(P5Q0)-L(fwdPxQx)
 495         .int       L(P6Q0)-L(fwdPxQx)
 496         .int       L(P7Q0)-L(fwdPxQx) 
 497 
 498         .int       L(P0Q1)-L(fwdPxQx)   /* 8 */
 499         .int       L(P1Q1)-L(fwdPxQx)
 500         .int       L(P2Q1)-L(fwdPxQx)
 501         .int       L(P3Q1)-L(fwdPxQx)
 502         .int       L(P4Q1)-L(fwdPxQx)
 503         .int       L(P5Q1)-L(fwdPxQx)
 504         .int       L(P6Q1)-L(fwdPxQx)
 505         .int       L(P7Q1)-L(fwdPxQx) 


 921         mov     0x28(%rdi), %rcx
 922         mov     %r10, 0x20(%rsi)
 923         mov     %rcx, 0x28(%rsi)
 924         mov     0x30(%rdi), %r8
 925         mov     0x38(%rdi), %r10
 926         lea     0x40(%rdi), %rdi
 927         mov     %r8, 0x30(%rsi)
 928         mov     %r10, 0x38(%rsi)
 929         lea     0x40(%rsi), %rsi
 930         jae     L(aligned_loop)
 931 
 932         /*
 933          * Copy remaining bytes (0-63)
 934          */
 935 L(do_remainder):
 936         leaq    L(fwdPxQx)(%rip), %r10
 937         addq    %rdx, %rdi
 938         addq    %rdx, %rsi
 939         movslq  (%r10,%rdx,4), %rcx
 940         leaq    (%rcx,%r10,1), %r10
 941         jmpq    *%r10
 942 
 943         /*
 944          * Use rep smovq. Clear remainder via unrolled code
 945          */
 946         .p2align 4
 947 L(use_rep):
 948         xchgq   %rdi, %rsi              /* %rsi = source, %rdi = destination */
 949         movq    %rdx, %rcx              /* %rcx = count */
 950         shrq    $3, %rcx                /* 8-byte word count */
 951         rep
 952           smovq
 953 
 954         xchgq   %rsi, %rdi              /* %rdi = src, %rsi = destination */
 955         andq    $7, %rdx                /* remainder */
 956         jnz     L(do_remainder)
 957         ret
 958 #undef  L
 959         SET_SIZE(bcopy_ck_size)
 960 
 961 #ifdef DEBUG


1153         jmp     call_panic              /* setup stack and call panic */
1154 0:
1155 #endif
1156         ALTENTRY(bzero_altentry)
1157 do_zero:
1158 #define L(s) .bzero/**/s
1159         xorl    %eax, %eax
1160 
1161         cmpq    $0x50, %rsi             /* 80 */
1162         jae     L(ck_align)
1163 
1164         /*
1165          * Performance data shows many caller's are zeroing small buffers. So
1166          * for best perf for these sizes unrolled code is used. Store zeros
1167          * without worrying about alignment.
1168          */
1169         leaq    L(setPxQx)(%rip), %r10
1170         addq    %rsi, %rdi
1171         movslq  (%r10,%rsi,4), %rcx
1172         leaq    (%rcx,%r10,1), %r10
1173         jmpq    *%r10
1174 
1175         .p2align 4
1176 L(setPxQx):
1177         .int       L(P0Q0)-L(setPxQx)   /* 0 */
1178         .int       L(P1Q0)-L(setPxQx)
1179         .int       L(P2Q0)-L(setPxQx)
1180         .int       L(P3Q0)-L(setPxQx)
1181         .int       L(P4Q0)-L(setPxQx)
1182         .int       L(P5Q0)-L(setPxQx)
1183         .int       L(P6Q0)-L(setPxQx)
1184         .int       L(P7Q0)-L(setPxQx) 
1185 
1186         .int       L(P0Q1)-L(setPxQx)   /* 8 */
1187         .int       L(P1Q1)-L(setPxQx)
1188         .int       L(P2Q1)-L(setPxQx)
1189         .int       L(P3Q1)-L(setPxQx)
1190         .int       L(P4Q1)-L(setPxQx)
1191         .int       L(P5Q1)-L(setPxQx)
1192         .int       L(P6Q1)-L(setPxQx)
1193         .int       L(P7Q1)-L(setPxQx) 


1424         cmpq    $0x40, %rsi
1425         movq    %rax, (%rdi) 
1426         movq    %rax, 0x8(%rdi) 
1427         movq    %rax, 0x10(%rdi) 
1428         movq    %rax, 0x18(%rdi) 
1429         movq    %rax, 0x20(%rdi) 
1430         movq    %rax, 0x28(%rdi) 
1431         movq    %rax, 0x30(%rdi) 
1432         movq    %rax, 0x38(%rdi) 
1433         leaq    0x40(%rdi), %rdi
1434         jae     L(bzero_loop)
1435 
1436         /*
1437          * Clear any remaining bytes..
1438          */
1439 9:
1440         leaq    L(setPxQx)(%rip), %r10
1441         addq    %rsi, %rdi
1442         movslq  (%r10,%rsi,4), %rcx
1443         leaq    (%rcx,%r10,1), %r10
1444         jmpq    *%r10
1445 
1446         /*
1447          * Use rep sstoq. Clear any remainder via unrolled code
1448          */
1449         .p2align 4
1450 L(use_rep):
1451         movq    %rsi, %rcx              /* get size in bytes */
1452         shrq    $3, %rcx                /* count of 8-byte words to zero */
1453         rep
1454           sstoq                         /* %rcx = words to clear (%rax=0) */
1455         andq    $7, %rsi                /* remaining bytes */
1456         jnz     9b
1457         ret
1458 #undef  L
1459         SET_SIZE(bzero_altentry)
1460         SET_SIZE(bzero)
1461 
1462 #elif defined(__i386)
1463 
1464 #define ARG_ADDR        4


1560         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1561         jae     3f                      /* take copyop if uaddr > kernelbase */
1562         SMAP_DISABLE_INSTR(0)
1563         jmp     do_copy_fault           /* Takes care of leave for us */
1564 
1565 _copyin_err:
1566         SMAP_ENABLE_INSTR(2)
1567         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */  
1568         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1569 3:
1570         movq    T_COPYOPS(%r9), %rax
1571         cmpq    $0, %rax
1572         jz      2f
1573         /*
1574          * reload args for the copyop
1575          */
1576         movq    (%rsp), %rdi
1577         movq    0x8(%rsp), %rsi
1578         movq    0x10(%rsp), %rdx
1579         leave
1580         jmp     *CP_COPYIN(%rax)

1581 
1582 2:      movl    $-1, %eax       
1583         leave
1584         ret
1585         SET_SIZE(copyin)
1586 
1587 #elif defined(__i386)
1588 
1589 #define ARG_UADDR       4
1590 #define ARG_KADDR       8
1591 
1592         ENTRY(copyin)
1593         movl    kernelbase, %ecx
1594 #ifdef DEBUG
1595         cmpl    %ecx, ARG_KADDR(%esp)
1596         jnb     1f
1597         pushl   %ebp
1598         movl    %esp, %ebp
1599         pushl   $.copyin_panic_msg
1600         call    panic


1705          * indicated through an errno value in %rax and we iret from the
1706          * trap handler to here.
1707          */
1708 _xcopyin_err:
1709         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1710 _xcopyin_nta_err:
1711         SMAP_ENABLE_INSTR(3)
1712         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1713 3:
1714         movq    T_COPYOPS(%r9), %r8
1715         cmpq    $0, %r8
1716         jz      2f
1717 
1718         /*
1719          * reload args for the copyop
1720          */
1721         movq    (%rsp), %rdi
1722         movq    0x8(%rsp), %rsi
1723         movq    0x10(%rsp), %rdx
1724         leave
1725         jmp     *CP_XCOPYIN(%r8)

1726 
1727 2:      leave
1728         ret
1729         SET_SIZE(xcopyin_nta)
1730 
1731 #elif defined(__i386)
1732 
1733 #define ARG_UADDR       4
1734 #define ARG_KADDR       8
1735 #define ARG_COUNT       12
1736 #define ARG_CACHED      16
1737 
1738         .globl  use_sse_copy
1739 
1740         ENTRY(xcopyin_nta)
1741         movl    kernelbase, %ecx
1742         lea     _xcopyin_err, %eax
1743         movl    %gs:CPU_THREAD, %edx
1744         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1745         jae     4f


1848         jae     3f                      /* take copyop if uaddr > kernelbase */
1849         SMAP_DISABLE_INSTR(3)
1850         jmp     do_copy_fault           /* Calls leave for us */
1851 
1852 _copyout_err:
1853         SMAP_ENABLE_INSTR(4)
1854         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1855         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1856 3:
1857         movq    T_COPYOPS(%r9), %rax
1858         cmpq    $0, %rax
1859         jz      2f
1860 
1861         /*
1862          * reload args for the copyop
1863          */
1864         movq    (%rsp), %rdi
1865         movq    0x8(%rsp), %rsi
1866         movq    0x10(%rsp), %rdx
1867         leave
1868         jmp     *CP_COPYOUT(%rax)

1869 
1870 2:      movl    $-1, %eax
1871         leave
1872         ret
1873         SET_SIZE(copyout)
1874 
1875 #elif defined(__i386)
1876 
1877 #define ARG_KADDR       4
1878 #define ARG_UADDR       8
1879 
1880         ENTRY(copyout)
1881         movl    kernelbase, %ecx
1882 #ifdef DEBUG
1883         cmpl    %ecx, ARG_KADDR(%esp)
1884         jnb     1f
1885         pushl   %ebp
1886         movl    %esp, %ebp
1887         pushl   $.copyout_panic_msg
1888         call    panic


1993          * indicated through an errno value in %rax and we iret from the
1994          * trap handler to here.
1995          */
1996 _xcopyout_err:
1997         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1998 _xcopyout_nta_err:
1999         SMAP_ENABLE_INSTR(6)
2000         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2001 3:
2002         movq    T_COPYOPS(%r9), %r8
2003         cmpq    $0, %r8
2004         jz      2f
2005 
2006         /*
2007          * reload args for the copyop
2008          */
2009         movq    (%rsp), %rdi
2010         movq    0x8(%rsp), %rsi
2011         movq    0x10(%rsp), %rdx
2012         leave
2013         jmp     *CP_XCOPYOUT(%r8)

2014 
2015 2:      leave
2016         ret
2017         SET_SIZE(xcopyout_nta)
2018 
2019 #elif defined(__i386)
2020 
2021 #define ARG_KADDR       4
2022 #define ARG_UADDR       8
2023 #define ARG_COUNT       12
2024 #define ARG_CACHED      16
2025 
2026         ENTRY(xcopyout_nta)
2027         movl    kernelbase, %ecx
2028         lea     _xcopyout_err, %eax
2029         movl    %gs:CPU_THREAD, %edx
2030         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2031         jae     4f
2032 
2033         cmpl    $0, use_sse_copy        /* no sse support */


2307 4:
2308         movq    %gs:CPU_THREAD, %r9
2309         jmp     3f
2310 
2311 _copyinstr_error:
2312         SMAP_ENABLE_INSTR(8)
2313         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2314 3:
2315         movq    T_COPYOPS(%r9), %rax
2316         cmpq    $0, %rax
2317         jz      2f
2318 
2319         /*
2320          * reload args for the copyop
2321          */
2322         movq    (%rsp), %rdi
2323         movq    0x8(%rsp), %rsi
2324         movq    0x10(%rsp), %rdx
2325         movq    0x18(%rsp), %rcx
2326         leave
2327         jmp     *CP_COPYINSTR(%rax)

2328         
2329 2:      movl    $EFAULT, %eax           /* return EFAULT */
2330         leave
2331         ret
2332         SET_SIZE(copyinstr)
2333 
2334 #elif defined(__i386)
2335 
2336 #define ARG_UADDR       4
2337 #define ARG_KADDR       8
2338 
2339         ENTRY(copyinstr)
2340         movl    kernelbase, %ecx
2341 #ifdef DEBUG
2342         cmpl    %ecx, ARG_KADDR(%esp)
2343         jnb     1f
2344         pushl   %ebp
2345         movl    %esp, %ebp
2346         pushl   $.copyinstr_panic_msg
2347         call    panic


2429 4:
2430         movq    %gs:CPU_THREAD, %r9
2431         jmp     3f
2432 
2433 _copyoutstr_error:
2434         SMAP_ENABLE_INSTR(9)
2435         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2436 3:
2437         movq    T_COPYOPS(%r9), %rax
2438         cmpq    $0, %rax
2439         jz      2f
2440 
2441         /*
2442          * reload args for the copyop
2443          */
2444         movq    (%rsp), %rdi
2445         movq    0x8(%rsp), %rsi
2446         movq    0x10(%rsp), %rdx
2447         movq    0x18(%rsp), %rcx
2448         leave
2449         jmp     *CP_COPYOUTSTR(%rax)

2450         
2451 2:      movl    $EFAULT, %eax           /* return EFAULT */
2452         leave
2453         ret
2454         SET_SIZE(copyoutstr)    
2455         
2456 #elif defined(__i386)
2457 
2458 #define ARG_KADDR       4
2459 #define ARG_UADDR       8
2460 
2461         ENTRY(copyoutstr)
2462         movl    kernelbase, %ecx
2463 #ifdef DEBUG
2464         cmpl    %ecx, ARG_KADDR(%esp)
2465         jnb     1f
2466         pushl   %ebp
2467         movl    %esp, %ebp
2468         pushl   $.copyoutstr_panic_msg
2469         call    panic


2545         ENTRY(NAME)                             \
2546         movq    %gs:CPU_THREAD, %r9;            \
2547         cmpq    kernelbase(%rip), %rdi;         \
2548         jae     1f;                             \
2549         leaq    _flt_/**/NAME, %rdx;            \
2550         movq    %rdx, T_LOFAULT(%r9);           \
2551         SMAP_DISABLE_INSTR(DISNUM)              \
2552         INSTR   (%rdi), REG;                    \
2553         movq    $0, T_LOFAULT(%r9);             \
2554         INSTR   REG, (%rsi);                    \
2555         xorl    %eax, %eax;                     \
2556         SMAP_ENABLE_INSTR(EN1)                  \
2557         ret;                                    \
2558 _flt_/**/NAME:                                  \
2559         SMAP_ENABLE_INSTR(EN2)                  \
2560         movq    $0, T_LOFAULT(%r9);             \
2561 1:                                              \
2562         movq    T_COPYOPS(%r9), %rax;           \
2563         cmpq    $0, %rax;                       \
2564         jz      2f;                             \
2565         jmp     *COPYOP(%rax);                  \

2566 2:                                              \
2567         movl    $-1, %eax;                      \
2568         ret;                                    \
2569         SET_SIZE(NAME)
2570         
2571         FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2572         FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2573         FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2574         FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2575 
2576 #elif defined(__i386)
2577 
2578 #define FUWORD(NAME, INSTR, REG, COPYOP)        \
2579         ENTRY(NAME)                             \
2580         movl    %gs:CPU_THREAD, %ecx;           \
2581         movl    kernelbase, %eax;               \
2582         cmpl    %eax, 4(%esp);                  \
2583         jae     1f;                             \
2584         lea     _flt_/**/NAME, %edx;            \
2585         movl    %edx, T_LOFAULT(%ecx);          \


2654 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2655         ENTRY(NAME)                             \
2656         movq    %gs:CPU_THREAD, %r9;            \
2657         cmpq    kernelbase(%rip), %rdi;         \
2658         jae     1f;                             \
2659         leaq    _flt_/**/NAME, %rdx;            \
2660         SMAP_DISABLE_INSTR(DISNUM)              \
2661         movq    %rdx, T_LOFAULT(%r9);           \
2662         INSTR   REG, (%rdi);                    \
2663         movq    $0, T_LOFAULT(%r9);             \
2664         xorl    %eax, %eax;                     \
2665         SMAP_ENABLE_INSTR(EN1)                  \
2666         ret;                                    \
2667 _flt_/**/NAME:                                  \
2668         SMAP_ENABLE_INSTR(EN2)                  \
2669         movq    $0, T_LOFAULT(%r9);             \
2670 1:                                              \
2671         movq    T_COPYOPS(%r9), %rax;           \
2672         cmpq    $0, %rax;                       \
2673         jz      3f;                             \
2674         jmp     *COPYOP(%rax);                  \

2675 3:                                              \
2676         movl    $-1, %eax;                      \
2677         ret;                                    \
2678         SET_SIZE(NAME)
2679 
2680         SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2681         SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2682         SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2683         SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2684 
2685 #elif defined(__i386)
2686 
2687 #define SUWORD(NAME, INSTR, REG, COPYOP)        \
2688         ENTRY(NAME)                             \
2689         movl    %gs:CPU_THREAD, %ecx;           \
2690         movl    kernelbase, %eax;               \
2691         cmpl    %eax, 4(%esp);                  \
2692         jae     1f;                             \
2693         lea     _flt_/**/NAME, %edx;            \
2694         movl    %edx, T_LOFAULT(%ecx);          \




  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2009, Intel Corporation
  28  * All rights reserved.
  29  */
  30 
  31 /*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.        */
  32 /*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T              */
  33 /*         All Rights Reserved                                          */
  34 
  35 /*       Copyright (c) 1987, 1988 Microsoft Corporation                 */
  36 /*         All Rights Reserved                                          */
  37 
  38 /*
  39  * Copyright 2019 Joyent, Inc.
  40  */
  41 
  42 #include <sys/errno.h>
  43 #include <sys/asm_linkage.h>
  44 
  45 #if defined(__lint)
  46 #include <sys/types.h>
  47 #include <sys/systm.h>
  48 #else   /* __lint */
  49 #include "assym.h"
  50 #endif  /* __lint */
  51 
  52 #define KCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  53 #define XCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  54 /*
  55  * Non-temopral access (NTA) alignment requirement
  56  */
  57 #define NTA_ALIGN_SIZE  4       /* Must be at least 4-byte aligned */
  58 #define NTA_ALIGN_MASK  _CONST(NTA_ALIGN_SIZE-1)
  59 #define COUNT_ALIGN_SIZE        16      /* Must be at least 16-byte aligned */


 465          * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
 466          * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
 467          * uses these registers in future they must be saved and restored.
 468          */
 469         ALTENTRY(bcopy_altentry)
 470 do_copy:
 471 #define L(s) .bcopy/**/s
 472         cmpq    $0x50, %rdx             /* 80 */
 473         jae     bcopy_ck_size
 474 
 475         /*
 476          * Performance data shows many caller's copy small buffers. So for
 477          * best perf for these sizes unrolled code is used. Store data without
 478          * worrying about alignment.
 479          */
 480         leaq    L(fwdPxQx)(%rip), %r10
 481         addq    %rdx, %rdi
 482         addq    %rdx, %rsi
 483         movslq  (%r10,%rdx,4), %rcx
 484         leaq    (%rcx,%r10,1), %r10
 485         INDIRECT_JMP_REG(r10)
 486 
 487         .p2align 4
 488 L(fwdPxQx):
 489         .int       L(P0Q0)-L(fwdPxQx)   /* 0 */
 490         .int       L(P1Q0)-L(fwdPxQx)
 491         .int       L(P2Q0)-L(fwdPxQx)
 492         .int       L(P3Q0)-L(fwdPxQx)
 493         .int       L(P4Q0)-L(fwdPxQx)
 494         .int       L(P5Q0)-L(fwdPxQx)
 495         .int       L(P6Q0)-L(fwdPxQx)
 496         .int       L(P7Q0)-L(fwdPxQx)
 497 
 498         .int       L(P0Q1)-L(fwdPxQx)   /* 8 */
 499         .int       L(P1Q1)-L(fwdPxQx)
 500         .int       L(P2Q1)-L(fwdPxQx)
 501         .int       L(P3Q1)-L(fwdPxQx)
 502         .int       L(P4Q1)-L(fwdPxQx)
 503         .int       L(P5Q1)-L(fwdPxQx)
 504         .int       L(P6Q1)-L(fwdPxQx)
 505         .int       L(P7Q1)-L(fwdPxQx)


 921         mov     0x28(%rdi), %rcx
 922         mov     %r10, 0x20(%rsi)
 923         mov     %rcx, 0x28(%rsi)
 924         mov     0x30(%rdi), %r8
 925         mov     0x38(%rdi), %r10
 926         lea     0x40(%rdi), %rdi
 927         mov     %r8, 0x30(%rsi)
 928         mov     %r10, 0x38(%rsi)
 929         lea     0x40(%rsi), %rsi
 930         jae     L(aligned_loop)
 931 
 932         /*
 933          * Copy remaining bytes (0-63)
 934          */
 935 L(do_remainder):
 936         leaq    L(fwdPxQx)(%rip), %r10
 937         addq    %rdx, %rdi
 938         addq    %rdx, %rsi
 939         movslq  (%r10,%rdx,4), %rcx
 940         leaq    (%rcx,%r10,1), %r10
 941         INDIRECT_JMP_REG(r10)
 942 
 943         /*
 944          * Use rep smovq. Clear remainder via unrolled code
 945          */
 946         .p2align 4
 947 L(use_rep):
 948         xchgq   %rdi, %rsi              /* %rsi = source, %rdi = destination */
 949         movq    %rdx, %rcx              /* %rcx = count */
 950         shrq    $3, %rcx                /* 8-byte word count */
 951         rep
 952           smovq
 953 
 954         xchgq   %rsi, %rdi              /* %rdi = src, %rsi = destination */
 955         andq    $7, %rdx                /* remainder */
 956         jnz     L(do_remainder)
 957         ret
 958 #undef  L
 959         SET_SIZE(bcopy_ck_size)
 960 
 961 #ifdef DEBUG


1153         jmp     call_panic              /* setup stack and call panic */
1154 0:
1155 #endif
1156         ALTENTRY(bzero_altentry)
1157 do_zero:
1158 #define L(s) .bzero/**/s
1159         xorl    %eax, %eax
1160 
1161         cmpq    $0x50, %rsi             /* 80 */
1162         jae     L(ck_align)
1163 
1164         /*
1165          * Performance data shows many caller's are zeroing small buffers. So
1166          * for best perf for these sizes unrolled code is used. Store zeros
1167          * without worrying about alignment.
1168          */
1169         leaq    L(setPxQx)(%rip), %r10
1170         addq    %rsi, %rdi
1171         movslq  (%r10,%rsi,4), %rcx
1172         leaq    (%rcx,%r10,1), %r10
1173         INDIRECT_JMP_REG(r10)
1174 
1175         .p2align 4
1176 L(setPxQx):
1177         .int       L(P0Q0)-L(setPxQx)   /* 0 */
1178         .int       L(P1Q0)-L(setPxQx)
1179         .int       L(P2Q0)-L(setPxQx)
1180         .int       L(P3Q0)-L(setPxQx)
1181         .int       L(P4Q0)-L(setPxQx)
1182         .int       L(P5Q0)-L(setPxQx)
1183         .int       L(P6Q0)-L(setPxQx)
1184         .int       L(P7Q0)-L(setPxQx)
1185 
1186         .int       L(P0Q1)-L(setPxQx)   /* 8 */
1187         .int       L(P1Q1)-L(setPxQx)
1188         .int       L(P2Q1)-L(setPxQx)
1189         .int       L(P3Q1)-L(setPxQx)
1190         .int       L(P4Q1)-L(setPxQx)
1191         .int       L(P5Q1)-L(setPxQx)
1192         .int       L(P6Q1)-L(setPxQx)
1193         .int       L(P7Q1)-L(setPxQx)


1424         cmpq    $0x40, %rsi
1425         movq    %rax, (%rdi)
1426         movq    %rax, 0x8(%rdi)
1427         movq    %rax, 0x10(%rdi)
1428         movq    %rax, 0x18(%rdi)
1429         movq    %rax, 0x20(%rdi)
1430         movq    %rax, 0x28(%rdi)
1431         movq    %rax, 0x30(%rdi)
1432         movq    %rax, 0x38(%rdi)
1433         leaq    0x40(%rdi), %rdi
1434         jae     L(bzero_loop)
1435 
1436         /*
1437          * Clear any remaining bytes..
1438          */
1439 9:
1440         leaq    L(setPxQx)(%rip), %r10
1441         addq    %rsi, %rdi
1442         movslq  (%r10,%rsi,4), %rcx
1443         leaq    (%rcx,%r10,1), %r10
1444         INDIRECT_JMP_REG(r10)
1445 
1446         /*
1447          * Use rep sstoq. Clear any remainder via unrolled code
1448          */
1449         .p2align 4
1450 L(use_rep):
1451         movq    %rsi, %rcx              /* get size in bytes */
1452         shrq    $3, %rcx                /* count of 8-byte words to zero */
1453         rep
1454           sstoq                         /* %rcx = words to clear (%rax=0) */
1455         andq    $7, %rsi                /* remaining bytes */
1456         jnz     9b
1457         ret
1458 #undef  L
1459         SET_SIZE(bzero_altentry)
1460         SET_SIZE(bzero)
1461 
1462 #elif defined(__i386)
1463 
1464 #define ARG_ADDR        4


1560         cmpq    %rax, %rdi              /* test uaddr < kernelbase */
1561         jae     3f                      /* take copyop if uaddr > kernelbase */
1562         SMAP_DISABLE_INSTR(0)
1563         jmp     do_copy_fault           /* Takes care of leave for us */
1564 
1565 _copyin_err:
1566         SMAP_ENABLE_INSTR(2)
1567         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1568         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1569 3:
1570         movq    T_COPYOPS(%r9), %rax
1571         cmpq    $0, %rax
1572         jz      2f
1573         /*
1574          * reload args for the copyop
1575          */
1576         movq    (%rsp), %rdi
1577         movq    0x8(%rsp), %rsi
1578         movq    0x10(%rsp), %rdx
1579         leave
1580         movq    CP_COPYIN(%rax), %rax
1581         INDIRECT_JMP_REG(rax)
1582 
1583 2:      movl    $-1, %eax
1584         leave
1585         ret
1586         SET_SIZE(copyin)
1587 
1588 #elif defined(__i386)
1589 
1590 #define ARG_UADDR       4
1591 #define ARG_KADDR       8
1592 
1593         ENTRY(copyin)
1594         movl    kernelbase, %ecx
1595 #ifdef DEBUG
1596         cmpl    %ecx, ARG_KADDR(%esp)
1597         jnb     1f
1598         pushl   %ebp
1599         movl    %esp, %ebp
1600         pushl   $.copyin_panic_msg
1601         call    panic


1706          * indicated through an errno value in %rax and we iret from the
1707          * trap handler to here.
1708          */
1709 _xcopyin_err:
1710         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1711 _xcopyin_nta_err:
1712         SMAP_ENABLE_INSTR(3)
1713         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1714 3:
1715         movq    T_COPYOPS(%r9), %r8
1716         cmpq    $0, %r8
1717         jz      2f
1718 
1719         /*
1720          * reload args for the copyop
1721          */
1722         movq    (%rsp), %rdi
1723         movq    0x8(%rsp), %rsi
1724         movq    0x10(%rsp), %rdx
1725         leave
1726         movq    CP_XCOPYIN(%r8), %r8
1727         INDIRECT_JMP_REG(r8)
1728 
1729 2:      leave
1730         ret
1731         SET_SIZE(xcopyin_nta)
1732 
1733 #elif defined(__i386)
1734 
1735 #define ARG_UADDR       4
1736 #define ARG_KADDR       8
1737 #define ARG_COUNT       12
1738 #define ARG_CACHED      16
1739 
1740         .globl  use_sse_copy
1741 
1742         ENTRY(xcopyin_nta)
1743         movl    kernelbase, %ecx
1744         lea     _xcopyin_err, %eax
1745         movl    %gs:CPU_THREAD, %edx
1746         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
1747         jae     4f


1850         jae     3f                      /* take copyop if uaddr > kernelbase */
1851         SMAP_DISABLE_INSTR(3)
1852         jmp     do_copy_fault           /* Calls leave for us */
1853 
1854 _copyout_err:
1855         SMAP_ENABLE_INSTR(4)
1856         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
1857         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
1858 3:
1859         movq    T_COPYOPS(%r9), %rax
1860         cmpq    $0, %rax
1861         jz      2f
1862 
1863         /*
1864          * reload args for the copyop
1865          */
1866         movq    (%rsp), %rdi
1867         movq    0x8(%rsp), %rsi
1868         movq    0x10(%rsp), %rdx
1869         leave
1870         movq    CP_COPYOUT(%rax), %rax
1871         INDIRECT_JMP_REG(rax)
1872 
1873 2:      movl    $-1, %eax
1874         leave
1875         ret
1876         SET_SIZE(copyout)
1877 
1878 #elif defined(__i386)
1879 
1880 #define ARG_KADDR       4
1881 #define ARG_UADDR       8
1882 
1883         ENTRY(copyout)
1884         movl    kernelbase, %ecx
1885 #ifdef DEBUG
1886         cmpl    %ecx, ARG_KADDR(%esp)
1887         jnb     1f
1888         pushl   %ebp
1889         movl    %esp, %ebp
1890         pushl   $.copyout_panic_msg
1891         call    panic


1996          * indicated through an errno value in %rax and we iret from the
1997          * trap handler to here.
1998          */
1999 _xcopyout_err:
2000         addq    $8, %rsp                /* pop bcopy_altentry call ret addr */
2001 _xcopyout_nta_err:
2002         SMAP_ENABLE_INSTR(6)
2003         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2004 3:
2005         movq    T_COPYOPS(%r9), %r8
2006         cmpq    $0, %r8
2007         jz      2f
2008 
2009         /*
2010          * reload args for the copyop
2011          */
2012         movq    (%rsp), %rdi
2013         movq    0x8(%rsp), %rsi
2014         movq    0x10(%rsp), %rdx
2015         leave
2016         movq    CP_XCOPYOUT(%r8), %r8
2017         INDIRECT_JMP_REG(r8)
2018 
2019 2:      leave
2020         ret
2021         SET_SIZE(xcopyout_nta)
2022 
2023 #elif defined(__i386)
2024 
2025 #define ARG_KADDR       4
2026 #define ARG_UADDR       8
2027 #define ARG_COUNT       12
2028 #define ARG_CACHED      16
2029 
2030         ENTRY(xcopyout_nta)
2031         movl    kernelbase, %ecx
2032         lea     _xcopyout_err, %eax
2033         movl    %gs:CPU_THREAD, %edx
2034         cmpl    %ecx, ARG_UADDR(%esp)   /* test uaddr < kernelbase */
2035         jae     4f
2036 
2037         cmpl    $0, use_sse_copy        /* no sse support */


2311 4:
2312         movq    %gs:CPU_THREAD, %r9
2313         jmp     3f
2314 
2315 _copyinstr_error:
2316         SMAP_ENABLE_INSTR(8)
2317         movq    %r11, T_LOFAULT(%r9)    /* restore original lofault */
2318 3:
2319         movq    T_COPYOPS(%r9), %rax
2320         cmpq    $0, %rax
2321         jz      2f
2322 
2323         /*
2324          * reload args for the copyop
2325          */
2326         movq    (%rsp), %rdi
2327         movq    0x8(%rsp), %rsi
2328         movq    0x10(%rsp), %rdx
2329         movq    0x18(%rsp), %rcx
2330         leave
2331         movq    CP_COPYINSTR(%rax), %rax
2332         INDIRECT_JMP_REG(rax)
2333 
2334 2:      movl    $EFAULT, %eax           /* return EFAULT */
2335         leave
2336         ret
2337         SET_SIZE(copyinstr)
2338 
2339 #elif defined(__i386)
2340 
2341 #define ARG_UADDR       4
2342 #define ARG_KADDR       8
2343 
2344         ENTRY(copyinstr)
2345         movl    kernelbase, %ecx
2346 #ifdef DEBUG
2347         cmpl    %ecx, ARG_KADDR(%esp)
2348         jnb     1f
2349         pushl   %ebp
2350         movl    %esp, %ebp
2351         pushl   $.copyinstr_panic_msg
2352         call    panic


2434 4:
2435         movq    %gs:CPU_THREAD, %r9
2436         jmp     3f
2437 
2438 _copyoutstr_error:
2439         SMAP_ENABLE_INSTR(9)
2440         movq    %r11, T_LOFAULT(%r9)    /* restore the original lofault */
2441 3:
2442         movq    T_COPYOPS(%r9), %rax
2443         cmpq    $0, %rax
2444         jz      2f
2445 
2446         /*
2447          * reload args for the copyop
2448          */
2449         movq    (%rsp), %rdi
2450         movq    0x8(%rsp), %rsi
2451         movq    0x10(%rsp), %rdx
2452         movq    0x18(%rsp), %rcx
2453         leave
2454         movq    CP_COPYOUTSTR(%rax), %rax
2455         INDIRECT_JMP_REG(rax)
2456 
2457 2:      movl    $EFAULT, %eax           /* return EFAULT */
2458         leave
2459         ret
2460         SET_SIZE(copyoutstr)
2461 
2462 #elif defined(__i386)
2463 
2464 #define ARG_KADDR       4
2465 #define ARG_UADDR       8
2466 
2467         ENTRY(copyoutstr)
2468         movl    kernelbase, %ecx
2469 #ifdef DEBUG
2470         cmpl    %ecx, ARG_KADDR(%esp)
2471         jnb     1f
2472         pushl   %ebp
2473         movl    %esp, %ebp
2474         pushl   $.copyoutstr_panic_msg
2475         call    panic


2551         ENTRY(NAME)                             \
2552         movq    %gs:CPU_THREAD, %r9;            \
2553         cmpq    kernelbase(%rip), %rdi;         \
2554         jae     1f;                             \
2555         leaq    _flt_/**/NAME, %rdx;            \
2556         movq    %rdx, T_LOFAULT(%r9);           \
2557         SMAP_DISABLE_INSTR(DISNUM)              \
2558         INSTR   (%rdi), REG;                    \
2559         movq    $0, T_LOFAULT(%r9);             \
2560         INSTR   REG, (%rsi);                    \
2561         xorl    %eax, %eax;                     \
2562         SMAP_ENABLE_INSTR(EN1)                  \
2563         ret;                                    \
2564 _flt_/**/NAME:                                  \
2565         SMAP_ENABLE_INSTR(EN2)                  \
2566         movq    $0, T_LOFAULT(%r9);             \
2567 1:                                              \
2568         movq    T_COPYOPS(%r9), %rax;           \
2569         cmpq    $0, %rax;                       \
2570         jz      2f;                             \
2571         movq    COPYOP(%rax), %rax;             \
2572         INDIRECT_JMP_REG(rax);                  \
2573 2:                                              \
2574         movl    $-1, %eax;                      \
2575         ret;                                    \
2576         SET_SIZE(NAME)
2577 
2578         FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2579         FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2580         FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2581         FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2582 
2583 #elif defined(__i386)
2584 
2585 #define FUWORD(NAME, INSTR, REG, COPYOP)        \
2586         ENTRY(NAME)                             \
2587         movl    %gs:CPU_THREAD, %ecx;           \
2588         movl    kernelbase, %eax;               \
2589         cmpl    %eax, 4(%esp);                  \
2590         jae     1f;                             \
2591         lea     _flt_/**/NAME, %edx;            \
2592         movl    %edx, T_LOFAULT(%ecx);          \


2661 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)      \
2662         ENTRY(NAME)                             \
2663         movq    %gs:CPU_THREAD, %r9;            \
2664         cmpq    kernelbase(%rip), %rdi;         \
2665         jae     1f;                             \
2666         leaq    _flt_/**/NAME, %rdx;            \
2667         SMAP_DISABLE_INSTR(DISNUM)              \
2668         movq    %rdx, T_LOFAULT(%r9);           \
2669         INSTR   REG, (%rdi);                    \
2670         movq    $0, T_LOFAULT(%r9);             \
2671         xorl    %eax, %eax;                     \
2672         SMAP_ENABLE_INSTR(EN1)                  \
2673         ret;                                    \
2674 _flt_/**/NAME:                                  \
2675         SMAP_ENABLE_INSTR(EN2)                  \
2676         movq    $0, T_LOFAULT(%r9);             \
2677 1:                                              \
2678         movq    T_COPYOPS(%r9), %rax;           \
2679         cmpq    $0, %rax;                       \
2680         jz      3f;                             \
2681         movq    COPYOP(%rax), %rax;             \
2682         INDIRECT_JMP_REG(rax);                  \
2683 3:                                              \
2684         movl    $-1, %eax;                      \
2685         ret;                                    \
2686         SET_SIZE(NAME)
2687 
2688         SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2689         SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2690         SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2691         SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2692 
2693 #elif defined(__i386)
2694 
2695 #define SUWORD(NAME, INSTR, REG, COPYOP)        \
2696         ENTRY(NAME)                             \
2697         movl    %gs:CPU_THREAD, %ecx;           \
2698         movl    kernelbase, %eax;               \
2699         cmpl    %eax, 4(%esp);                  \
2700         jae     1f;                             \
2701         lea     _flt_/**/NAME, %edx;            \
2702         movl    %edx, T_LOFAULT(%ecx);          \