19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2009, Intel Corporation
28 * All rights reserved.
29 */
30
31 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
33 /* All Rights Reserved */
34
35 /* Copyright (c) 1987, 1988 Microsoft Corporation */
36 /* All Rights Reserved */
37
38 /*
39 * Copyright (c) 2018 Joyent, Inc.
40 */
41
42 #include <sys/errno.h>
43 #include <sys/asm_linkage.h>
44
45 #if defined(__lint)
46 #include <sys/types.h>
47 #include <sys/systm.h>
48 #else /* __lint */
49 #include "assym.h"
50 #endif /* __lint */
51
52 #define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
53 #define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
54 /*
55 * Non-temopral access (NTA) alignment requirement
56 */
57 #define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */
58 #define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1)
59 #define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */
465 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
466 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
467 * uses these registers in future they must be saved and restored.
468 */
469 ALTENTRY(bcopy_altentry)
470 do_copy:
471 #define L(s) .bcopy/**/s
472 cmpq $0x50, %rdx /* 80 */
473 jae bcopy_ck_size
474
475 /*
476 * Performance data shows many caller's copy small buffers. So for
477 * best perf for these sizes unrolled code is used. Store data without
478 * worrying about alignment.
479 */
480 leaq L(fwdPxQx)(%rip), %r10
481 addq %rdx, %rdi
482 addq %rdx, %rsi
483 movslq (%r10,%rdx,4), %rcx
484 leaq (%rcx,%r10,1), %r10
485 jmpq *%r10
486
487 .p2align 4
488 L(fwdPxQx):
489 .int L(P0Q0)-L(fwdPxQx) /* 0 */
490 .int L(P1Q0)-L(fwdPxQx)
491 .int L(P2Q0)-L(fwdPxQx)
492 .int L(P3Q0)-L(fwdPxQx)
493 .int L(P4Q0)-L(fwdPxQx)
494 .int L(P5Q0)-L(fwdPxQx)
495 .int L(P6Q0)-L(fwdPxQx)
496 .int L(P7Q0)-L(fwdPxQx)
497
498 .int L(P0Q1)-L(fwdPxQx) /* 8 */
499 .int L(P1Q1)-L(fwdPxQx)
500 .int L(P2Q1)-L(fwdPxQx)
501 .int L(P3Q1)-L(fwdPxQx)
502 .int L(P4Q1)-L(fwdPxQx)
503 .int L(P5Q1)-L(fwdPxQx)
504 .int L(P6Q1)-L(fwdPxQx)
505 .int L(P7Q1)-L(fwdPxQx)
921 mov 0x28(%rdi), %rcx
922 mov %r10, 0x20(%rsi)
923 mov %rcx, 0x28(%rsi)
924 mov 0x30(%rdi), %r8
925 mov 0x38(%rdi), %r10
926 lea 0x40(%rdi), %rdi
927 mov %r8, 0x30(%rsi)
928 mov %r10, 0x38(%rsi)
929 lea 0x40(%rsi), %rsi
930 jae L(aligned_loop)
931
932 /*
933 * Copy remaining bytes (0-63)
934 */
935 L(do_remainder):
936 leaq L(fwdPxQx)(%rip), %r10
937 addq %rdx, %rdi
938 addq %rdx, %rsi
939 movslq (%r10,%rdx,4), %rcx
940 leaq (%rcx,%r10,1), %r10
941 jmpq *%r10
942
943 /*
944 * Use rep smovq. Clear remainder via unrolled code
945 */
946 .p2align 4
947 L(use_rep):
948 xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */
949 movq %rdx, %rcx /* %rcx = count */
950 shrq $3, %rcx /* 8-byte word count */
951 rep
952 smovq
953
954 xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */
955 andq $7, %rdx /* remainder */
956 jnz L(do_remainder)
957 ret
958 #undef L
959 SET_SIZE(bcopy_ck_size)
960
961 #ifdef DEBUG
1153 jmp call_panic /* setup stack and call panic */
1154 0:
1155 #endif
1156 ALTENTRY(bzero_altentry)
1157 do_zero:
1158 #define L(s) .bzero/**/s
1159 xorl %eax, %eax
1160
1161 cmpq $0x50, %rsi /* 80 */
1162 jae L(ck_align)
1163
1164 /*
1165 * Performance data shows many caller's are zeroing small buffers. So
1166 * for best perf for these sizes unrolled code is used. Store zeros
1167 * without worrying about alignment.
1168 */
1169 leaq L(setPxQx)(%rip), %r10
1170 addq %rsi, %rdi
1171 movslq (%r10,%rsi,4), %rcx
1172 leaq (%rcx,%r10,1), %r10
1173 jmpq *%r10
1174
1175 .p2align 4
1176 L(setPxQx):
1177 .int L(P0Q0)-L(setPxQx) /* 0 */
1178 .int L(P1Q0)-L(setPxQx)
1179 .int L(P2Q0)-L(setPxQx)
1180 .int L(P3Q0)-L(setPxQx)
1181 .int L(P4Q0)-L(setPxQx)
1182 .int L(P5Q0)-L(setPxQx)
1183 .int L(P6Q0)-L(setPxQx)
1184 .int L(P7Q0)-L(setPxQx)
1185
1186 .int L(P0Q1)-L(setPxQx) /* 8 */
1187 .int L(P1Q1)-L(setPxQx)
1188 .int L(P2Q1)-L(setPxQx)
1189 .int L(P3Q1)-L(setPxQx)
1190 .int L(P4Q1)-L(setPxQx)
1191 .int L(P5Q1)-L(setPxQx)
1192 .int L(P6Q1)-L(setPxQx)
1193 .int L(P7Q1)-L(setPxQx)
1424 cmpq $0x40, %rsi
1425 movq %rax, (%rdi)
1426 movq %rax, 0x8(%rdi)
1427 movq %rax, 0x10(%rdi)
1428 movq %rax, 0x18(%rdi)
1429 movq %rax, 0x20(%rdi)
1430 movq %rax, 0x28(%rdi)
1431 movq %rax, 0x30(%rdi)
1432 movq %rax, 0x38(%rdi)
1433 leaq 0x40(%rdi), %rdi
1434 jae L(bzero_loop)
1435
1436 /*
1437 * Clear any remaining bytes..
1438 */
1439 9:
1440 leaq L(setPxQx)(%rip), %r10
1441 addq %rsi, %rdi
1442 movslq (%r10,%rsi,4), %rcx
1443 leaq (%rcx,%r10,1), %r10
1444 jmpq *%r10
1445
1446 /*
1447 * Use rep sstoq. Clear any remainder via unrolled code
1448 */
1449 .p2align 4
1450 L(use_rep):
1451 movq %rsi, %rcx /* get size in bytes */
1452 shrq $3, %rcx /* count of 8-byte words to zero */
1453 rep
1454 sstoq /* %rcx = words to clear (%rax=0) */
1455 andq $7, %rsi /* remaining bytes */
1456 jnz 9b
1457 ret
1458 #undef L
1459 SET_SIZE(bzero_altentry)
1460 SET_SIZE(bzero)
1461
1462 #elif defined(__i386)
1463
1464 #define ARG_ADDR 4
1560 cmpq %rax, %rdi /* test uaddr < kernelbase */
1561 jae 3f /* take copyop if uaddr > kernelbase */
1562 SMAP_DISABLE_INSTR(0)
1563 jmp do_copy_fault /* Takes care of leave for us */
1564
1565 _copyin_err:
1566 SMAP_ENABLE_INSTR(2)
1567 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1568 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1569 3:
1570 movq T_COPYOPS(%r9), %rax
1571 cmpq $0, %rax
1572 jz 2f
1573 /*
1574 * reload args for the copyop
1575 */
1576 movq (%rsp), %rdi
1577 movq 0x8(%rsp), %rsi
1578 movq 0x10(%rsp), %rdx
1579 leave
1580 jmp *CP_COPYIN(%rax)
1581
1582 2: movl $-1, %eax
1583 leave
1584 ret
1585 SET_SIZE(copyin)
1586
1587 #elif defined(__i386)
1588
1589 #define ARG_UADDR 4
1590 #define ARG_KADDR 8
1591
1592 ENTRY(copyin)
1593 movl kernelbase, %ecx
1594 #ifdef DEBUG
1595 cmpl %ecx, ARG_KADDR(%esp)
1596 jnb 1f
1597 pushl %ebp
1598 movl %esp, %ebp
1599 pushl $.copyin_panic_msg
1600 call panic
1705 * indicated through an errno value in %rax and we iret from the
1706 * trap handler to here.
1707 */
1708 _xcopyin_err:
1709 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1710 _xcopyin_nta_err:
1711 SMAP_ENABLE_INSTR(3)
1712 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1713 3:
1714 movq T_COPYOPS(%r9), %r8
1715 cmpq $0, %r8
1716 jz 2f
1717
1718 /*
1719 * reload args for the copyop
1720 */
1721 movq (%rsp), %rdi
1722 movq 0x8(%rsp), %rsi
1723 movq 0x10(%rsp), %rdx
1724 leave
1725 jmp *CP_XCOPYIN(%r8)
1726
1727 2: leave
1728 ret
1729 SET_SIZE(xcopyin_nta)
1730
1731 #elif defined(__i386)
1732
1733 #define ARG_UADDR 4
1734 #define ARG_KADDR 8
1735 #define ARG_COUNT 12
1736 #define ARG_CACHED 16
1737
1738 .globl use_sse_copy
1739
1740 ENTRY(xcopyin_nta)
1741 movl kernelbase, %ecx
1742 lea _xcopyin_err, %eax
1743 movl %gs:CPU_THREAD, %edx
1744 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1745 jae 4f
1848 jae 3f /* take copyop if uaddr > kernelbase */
1849 SMAP_DISABLE_INSTR(3)
1850 jmp do_copy_fault /* Calls leave for us */
1851
1852 _copyout_err:
1853 SMAP_ENABLE_INSTR(4)
1854 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1855 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1856 3:
1857 movq T_COPYOPS(%r9), %rax
1858 cmpq $0, %rax
1859 jz 2f
1860
1861 /*
1862 * reload args for the copyop
1863 */
1864 movq (%rsp), %rdi
1865 movq 0x8(%rsp), %rsi
1866 movq 0x10(%rsp), %rdx
1867 leave
1868 jmp *CP_COPYOUT(%rax)
1869
1870 2: movl $-1, %eax
1871 leave
1872 ret
1873 SET_SIZE(copyout)
1874
1875 #elif defined(__i386)
1876
1877 #define ARG_KADDR 4
1878 #define ARG_UADDR 8
1879
1880 ENTRY(copyout)
1881 movl kernelbase, %ecx
1882 #ifdef DEBUG
1883 cmpl %ecx, ARG_KADDR(%esp)
1884 jnb 1f
1885 pushl %ebp
1886 movl %esp, %ebp
1887 pushl $.copyout_panic_msg
1888 call panic
1993 * indicated through an errno value in %rax and we iret from the
1994 * trap handler to here.
1995 */
1996 _xcopyout_err:
1997 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1998 _xcopyout_nta_err:
1999 SMAP_ENABLE_INSTR(6)
2000 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2001 3:
2002 movq T_COPYOPS(%r9), %r8
2003 cmpq $0, %r8
2004 jz 2f
2005
2006 /*
2007 * reload args for the copyop
2008 */
2009 movq (%rsp), %rdi
2010 movq 0x8(%rsp), %rsi
2011 movq 0x10(%rsp), %rdx
2012 leave
2013 jmp *CP_XCOPYOUT(%r8)
2014
2015 2: leave
2016 ret
2017 SET_SIZE(xcopyout_nta)
2018
2019 #elif defined(__i386)
2020
2021 #define ARG_KADDR 4
2022 #define ARG_UADDR 8
2023 #define ARG_COUNT 12
2024 #define ARG_CACHED 16
2025
2026 ENTRY(xcopyout_nta)
2027 movl kernelbase, %ecx
2028 lea _xcopyout_err, %eax
2029 movl %gs:CPU_THREAD, %edx
2030 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2031 jae 4f
2032
2033 cmpl $0, use_sse_copy /* no sse support */
2307 4:
2308 movq %gs:CPU_THREAD, %r9
2309 jmp 3f
2310
2311 _copyinstr_error:
2312 SMAP_ENABLE_INSTR(8)
2313 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2314 3:
2315 movq T_COPYOPS(%r9), %rax
2316 cmpq $0, %rax
2317 jz 2f
2318
2319 /*
2320 * reload args for the copyop
2321 */
2322 movq (%rsp), %rdi
2323 movq 0x8(%rsp), %rsi
2324 movq 0x10(%rsp), %rdx
2325 movq 0x18(%rsp), %rcx
2326 leave
2327 jmp *CP_COPYINSTR(%rax)
2328
2329 2: movl $EFAULT, %eax /* return EFAULT */
2330 leave
2331 ret
2332 SET_SIZE(copyinstr)
2333
2334 #elif defined(__i386)
2335
2336 #define ARG_UADDR 4
2337 #define ARG_KADDR 8
2338
2339 ENTRY(copyinstr)
2340 movl kernelbase, %ecx
2341 #ifdef DEBUG
2342 cmpl %ecx, ARG_KADDR(%esp)
2343 jnb 1f
2344 pushl %ebp
2345 movl %esp, %ebp
2346 pushl $.copyinstr_panic_msg
2347 call panic
2429 4:
2430 movq %gs:CPU_THREAD, %r9
2431 jmp 3f
2432
2433 _copyoutstr_error:
2434 SMAP_ENABLE_INSTR(9)
2435 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2436 3:
2437 movq T_COPYOPS(%r9), %rax
2438 cmpq $0, %rax
2439 jz 2f
2440
2441 /*
2442 * reload args for the copyop
2443 */
2444 movq (%rsp), %rdi
2445 movq 0x8(%rsp), %rsi
2446 movq 0x10(%rsp), %rdx
2447 movq 0x18(%rsp), %rcx
2448 leave
2449 jmp *CP_COPYOUTSTR(%rax)
2450
2451 2: movl $EFAULT, %eax /* return EFAULT */
2452 leave
2453 ret
2454 SET_SIZE(copyoutstr)
2455
2456 #elif defined(__i386)
2457
2458 #define ARG_KADDR 4
2459 #define ARG_UADDR 8
2460
2461 ENTRY(copyoutstr)
2462 movl kernelbase, %ecx
2463 #ifdef DEBUG
2464 cmpl %ecx, ARG_KADDR(%esp)
2465 jnb 1f
2466 pushl %ebp
2467 movl %esp, %ebp
2468 pushl $.copyoutstr_panic_msg
2469 call panic
2545 ENTRY(NAME) \
2546 movq %gs:CPU_THREAD, %r9; \
2547 cmpq kernelbase(%rip), %rdi; \
2548 jae 1f; \
2549 leaq _flt_/**/NAME, %rdx; \
2550 movq %rdx, T_LOFAULT(%r9); \
2551 SMAP_DISABLE_INSTR(DISNUM) \
2552 INSTR (%rdi), REG; \
2553 movq $0, T_LOFAULT(%r9); \
2554 INSTR REG, (%rsi); \
2555 xorl %eax, %eax; \
2556 SMAP_ENABLE_INSTR(EN1) \
2557 ret; \
2558 _flt_/**/NAME: \
2559 SMAP_ENABLE_INSTR(EN2) \
2560 movq $0, T_LOFAULT(%r9); \
2561 1: \
2562 movq T_COPYOPS(%r9), %rax; \
2563 cmpq $0, %rax; \
2564 jz 2f; \
2565 jmp *COPYOP(%rax); \
2566 2: \
2567 movl $-1, %eax; \
2568 ret; \
2569 SET_SIZE(NAME)
2570
2571 FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2572 FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2573 FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2574 FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2575
2576 #elif defined(__i386)
2577
2578 #define FUWORD(NAME, INSTR, REG, COPYOP) \
2579 ENTRY(NAME) \
2580 movl %gs:CPU_THREAD, %ecx; \
2581 movl kernelbase, %eax; \
2582 cmpl %eax, 4(%esp); \
2583 jae 1f; \
2584 lea _flt_/**/NAME, %edx; \
2585 movl %edx, T_LOFAULT(%ecx); \
2654 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2655 ENTRY(NAME) \
2656 movq %gs:CPU_THREAD, %r9; \
2657 cmpq kernelbase(%rip), %rdi; \
2658 jae 1f; \
2659 leaq _flt_/**/NAME, %rdx; \
2660 SMAP_DISABLE_INSTR(DISNUM) \
2661 movq %rdx, T_LOFAULT(%r9); \
2662 INSTR REG, (%rdi); \
2663 movq $0, T_LOFAULT(%r9); \
2664 xorl %eax, %eax; \
2665 SMAP_ENABLE_INSTR(EN1) \
2666 ret; \
2667 _flt_/**/NAME: \
2668 SMAP_ENABLE_INSTR(EN2) \
2669 movq $0, T_LOFAULT(%r9); \
2670 1: \
2671 movq T_COPYOPS(%r9), %rax; \
2672 cmpq $0, %rax; \
2673 jz 3f; \
2674 jmp *COPYOP(%rax); \
2675 3: \
2676 movl $-1, %eax; \
2677 ret; \
2678 SET_SIZE(NAME)
2679
2680 SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2681 SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2682 SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2683 SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2684
2685 #elif defined(__i386)
2686
2687 #define SUWORD(NAME, INSTR, REG, COPYOP) \
2688 ENTRY(NAME) \
2689 movl %gs:CPU_THREAD, %ecx; \
2690 movl kernelbase, %eax; \
2691 cmpl %eax, 4(%esp); \
2692 jae 1f; \
2693 lea _flt_/**/NAME, %edx; \
2694 movl %edx, T_LOFAULT(%ecx); \
|
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2009, Intel Corporation
28 * All rights reserved.
29 */
30
31 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
33 /* All Rights Reserved */
34
35 /* Copyright (c) 1987, 1988 Microsoft Corporation */
36 /* All Rights Reserved */
37
38 /*
39 * Copyright 2019 Joyent, Inc.
40 */
41
42 #include <sys/errno.h>
43 #include <sys/asm_linkage.h>
44
45 #if defined(__lint)
46 #include <sys/types.h>
47 #include <sys/systm.h>
48 #else /* __lint */
49 #include "assym.h"
50 #endif /* __lint */
51
52 #define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
53 #define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */
54 /*
55 * Non-temopral access (NTA) alignment requirement
56 */
57 #define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */
58 #define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1)
59 #define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */
465 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
466 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
467 * uses these registers in future they must be saved and restored.
468 */
469 ALTENTRY(bcopy_altentry)
470 do_copy:
471 #define L(s) .bcopy/**/s
472 cmpq $0x50, %rdx /* 80 */
473 jae bcopy_ck_size
474
475 /*
476 * Performance data shows many caller's copy small buffers. So for
477 * best perf for these sizes unrolled code is used. Store data without
478 * worrying about alignment.
479 */
480 leaq L(fwdPxQx)(%rip), %r10
481 addq %rdx, %rdi
482 addq %rdx, %rsi
483 movslq (%r10,%rdx,4), %rcx
484 leaq (%rcx,%r10,1), %r10
485 INDIRECT_JMP_REG(r10)
486
487 .p2align 4
488 L(fwdPxQx):
489 .int L(P0Q0)-L(fwdPxQx) /* 0 */
490 .int L(P1Q0)-L(fwdPxQx)
491 .int L(P2Q0)-L(fwdPxQx)
492 .int L(P3Q0)-L(fwdPxQx)
493 .int L(P4Q0)-L(fwdPxQx)
494 .int L(P5Q0)-L(fwdPxQx)
495 .int L(P6Q0)-L(fwdPxQx)
496 .int L(P7Q0)-L(fwdPxQx)
497
498 .int L(P0Q1)-L(fwdPxQx) /* 8 */
499 .int L(P1Q1)-L(fwdPxQx)
500 .int L(P2Q1)-L(fwdPxQx)
501 .int L(P3Q1)-L(fwdPxQx)
502 .int L(P4Q1)-L(fwdPxQx)
503 .int L(P5Q1)-L(fwdPxQx)
504 .int L(P6Q1)-L(fwdPxQx)
505 .int L(P7Q1)-L(fwdPxQx)
921 mov 0x28(%rdi), %rcx
922 mov %r10, 0x20(%rsi)
923 mov %rcx, 0x28(%rsi)
924 mov 0x30(%rdi), %r8
925 mov 0x38(%rdi), %r10
926 lea 0x40(%rdi), %rdi
927 mov %r8, 0x30(%rsi)
928 mov %r10, 0x38(%rsi)
929 lea 0x40(%rsi), %rsi
930 jae L(aligned_loop)
931
932 /*
933 * Copy remaining bytes (0-63)
934 */
935 L(do_remainder):
936 leaq L(fwdPxQx)(%rip), %r10
937 addq %rdx, %rdi
938 addq %rdx, %rsi
939 movslq (%r10,%rdx,4), %rcx
940 leaq (%rcx,%r10,1), %r10
941 INDIRECT_JMP_REG(r10)
942
943 /*
944 * Use rep smovq. Clear remainder via unrolled code
945 */
946 .p2align 4
947 L(use_rep):
948 xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */
949 movq %rdx, %rcx /* %rcx = count */
950 shrq $3, %rcx /* 8-byte word count */
951 rep
952 smovq
953
954 xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */
955 andq $7, %rdx /* remainder */
956 jnz L(do_remainder)
957 ret
958 #undef L
959 SET_SIZE(bcopy_ck_size)
960
961 #ifdef DEBUG
1153 jmp call_panic /* setup stack and call panic */
1154 0:
1155 #endif
1156 ALTENTRY(bzero_altentry)
1157 do_zero:
1158 #define L(s) .bzero/**/s
1159 xorl %eax, %eax
1160
1161 cmpq $0x50, %rsi /* 80 */
1162 jae L(ck_align)
1163
1164 /*
1165 * Performance data shows many caller's are zeroing small buffers. So
1166 * for best perf for these sizes unrolled code is used. Store zeros
1167 * without worrying about alignment.
1168 */
1169 leaq L(setPxQx)(%rip), %r10
1170 addq %rsi, %rdi
1171 movslq (%r10,%rsi,4), %rcx
1172 leaq (%rcx,%r10,1), %r10
1173 INDIRECT_JMP_REG(r10)
1174
1175 .p2align 4
1176 L(setPxQx):
1177 .int L(P0Q0)-L(setPxQx) /* 0 */
1178 .int L(P1Q0)-L(setPxQx)
1179 .int L(P2Q0)-L(setPxQx)
1180 .int L(P3Q0)-L(setPxQx)
1181 .int L(P4Q0)-L(setPxQx)
1182 .int L(P5Q0)-L(setPxQx)
1183 .int L(P6Q0)-L(setPxQx)
1184 .int L(P7Q0)-L(setPxQx)
1185
1186 .int L(P0Q1)-L(setPxQx) /* 8 */
1187 .int L(P1Q1)-L(setPxQx)
1188 .int L(P2Q1)-L(setPxQx)
1189 .int L(P3Q1)-L(setPxQx)
1190 .int L(P4Q1)-L(setPxQx)
1191 .int L(P5Q1)-L(setPxQx)
1192 .int L(P6Q1)-L(setPxQx)
1193 .int L(P7Q1)-L(setPxQx)
1424 cmpq $0x40, %rsi
1425 movq %rax, (%rdi)
1426 movq %rax, 0x8(%rdi)
1427 movq %rax, 0x10(%rdi)
1428 movq %rax, 0x18(%rdi)
1429 movq %rax, 0x20(%rdi)
1430 movq %rax, 0x28(%rdi)
1431 movq %rax, 0x30(%rdi)
1432 movq %rax, 0x38(%rdi)
1433 leaq 0x40(%rdi), %rdi
1434 jae L(bzero_loop)
1435
1436 /*
1437 * Clear any remaining bytes..
1438 */
1439 9:
1440 leaq L(setPxQx)(%rip), %r10
1441 addq %rsi, %rdi
1442 movslq (%r10,%rsi,4), %rcx
1443 leaq (%rcx,%r10,1), %r10
1444 INDIRECT_JMP_REG(r10)
1445
1446 /*
1447 * Use rep sstoq. Clear any remainder via unrolled code
1448 */
1449 .p2align 4
1450 L(use_rep):
1451 movq %rsi, %rcx /* get size in bytes */
1452 shrq $3, %rcx /* count of 8-byte words to zero */
1453 rep
1454 sstoq /* %rcx = words to clear (%rax=0) */
1455 andq $7, %rsi /* remaining bytes */
1456 jnz 9b
1457 ret
1458 #undef L
1459 SET_SIZE(bzero_altentry)
1460 SET_SIZE(bzero)
1461
1462 #elif defined(__i386)
1463
1464 #define ARG_ADDR 4
1560 cmpq %rax, %rdi /* test uaddr < kernelbase */
1561 jae 3f /* take copyop if uaddr > kernelbase */
1562 SMAP_DISABLE_INSTR(0)
1563 jmp do_copy_fault /* Takes care of leave for us */
1564
1565 _copyin_err:
1566 SMAP_ENABLE_INSTR(2)
1567 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1568 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1569 3:
1570 movq T_COPYOPS(%r9), %rax
1571 cmpq $0, %rax
1572 jz 2f
1573 /*
1574 * reload args for the copyop
1575 */
1576 movq (%rsp), %rdi
1577 movq 0x8(%rsp), %rsi
1578 movq 0x10(%rsp), %rdx
1579 leave
1580 movq CP_COPYIN(%rax), %rax
1581 INDIRECT_JMP_REG(rax)
1582
1583 2: movl $-1, %eax
1584 leave
1585 ret
1586 SET_SIZE(copyin)
1587
1588 #elif defined(__i386)
1589
1590 #define ARG_UADDR 4
1591 #define ARG_KADDR 8
1592
1593 ENTRY(copyin)
1594 movl kernelbase, %ecx
1595 #ifdef DEBUG
1596 cmpl %ecx, ARG_KADDR(%esp)
1597 jnb 1f
1598 pushl %ebp
1599 movl %esp, %ebp
1600 pushl $.copyin_panic_msg
1601 call panic
1706 * indicated through an errno value in %rax and we iret from the
1707 * trap handler to here.
1708 */
1709 _xcopyin_err:
1710 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1711 _xcopyin_nta_err:
1712 SMAP_ENABLE_INSTR(3)
1713 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1714 3:
1715 movq T_COPYOPS(%r9), %r8
1716 cmpq $0, %r8
1717 jz 2f
1718
1719 /*
1720 * reload args for the copyop
1721 */
1722 movq (%rsp), %rdi
1723 movq 0x8(%rsp), %rsi
1724 movq 0x10(%rsp), %rdx
1725 leave
1726 movq CP_XCOPYIN(%r8), %r8
1727 INDIRECT_JMP_REG(r8)
1728
1729 2: leave
1730 ret
1731 SET_SIZE(xcopyin_nta)
1732
1733 #elif defined(__i386)
1734
1735 #define ARG_UADDR 4
1736 #define ARG_KADDR 8
1737 #define ARG_COUNT 12
1738 #define ARG_CACHED 16
1739
1740 .globl use_sse_copy
1741
1742 ENTRY(xcopyin_nta)
1743 movl kernelbase, %ecx
1744 lea _xcopyin_err, %eax
1745 movl %gs:CPU_THREAD, %edx
1746 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
1747 jae 4f
1850 jae 3f /* take copyop if uaddr > kernelbase */
1851 SMAP_DISABLE_INSTR(3)
1852 jmp do_copy_fault /* Calls leave for us */
1853
1854 _copyout_err:
1855 SMAP_ENABLE_INSTR(4)
1856 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
1857 addq $8, %rsp /* pop bcopy_altentry call ret addr */
1858 3:
1859 movq T_COPYOPS(%r9), %rax
1860 cmpq $0, %rax
1861 jz 2f
1862
1863 /*
1864 * reload args for the copyop
1865 */
1866 movq (%rsp), %rdi
1867 movq 0x8(%rsp), %rsi
1868 movq 0x10(%rsp), %rdx
1869 leave
1870 movq CP_COPYOUT(%rax), %rax
1871 INDIRECT_JMP_REG(rax)
1872
1873 2: movl $-1, %eax
1874 leave
1875 ret
1876 SET_SIZE(copyout)
1877
1878 #elif defined(__i386)
1879
1880 #define ARG_KADDR 4
1881 #define ARG_UADDR 8
1882
1883 ENTRY(copyout)
1884 movl kernelbase, %ecx
1885 #ifdef DEBUG
1886 cmpl %ecx, ARG_KADDR(%esp)
1887 jnb 1f
1888 pushl %ebp
1889 movl %esp, %ebp
1890 pushl $.copyout_panic_msg
1891 call panic
1996 * indicated through an errno value in %rax and we iret from the
1997 * trap handler to here.
1998 */
1999 _xcopyout_err:
2000 addq $8, %rsp /* pop bcopy_altentry call ret addr */
2001 _xcopyout_nta_err:
2002 SMAP_ENABLE_INSTR(6)
2003 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2004 3:
2005 movq T_COPYOPS(%r9), %r8
2006 cmpq $0, %r8
2007 jz 2f
2008
2009 /*
2010 * reload args for the copyop
2011 */
2012 movq (%rsp), %rdi
2013 movq 0x8(%rsp), %rsi
2014 movq 0x10(%rsp), %rdx
2015 leave
2016 movq CP_XCOPYOUT(%r8), %r8
2017 INDIRECT_JMP_REG(r8)
2018
2019 2: leave
2020 ret
2021 SET_SIZE(xcopyout_nta)
2022
2023 #elif defined(__i386)
2024
2025 #define ARG_KADDR 4
2026 #define ARG_UADDR 8
2027 #define ARG_COUNT 12
2028 #define ARG_CACHED 16
2029
2030 ENTRY(xcopyout_nta)
2031 movl kernelbase, %ecx
2032 lea _xcopyout_err, %eax
2033 movl %gs:CPU_THREAD, %edx
2034 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */
2035 jae 4f
2036
2037 cmpl $0, use_sse_copy /* no sse support */
2311 4:
2312 movq %gs:CPU_THREAD, %r9
2313 jmp 3f
2314
2315 _copyinstr_error:
2316 SMAP_ENABLE_INSTR(8)
2317 movq %r11, T_LOFAULT(%r9) /* restore original lofault */
2318 3:
2319 movq T_COPYOPS(%r9), %rax
2320 cmpq $0, %rax
2321 jz 2f
2322
2323 /*
2324 * reload args for the copyop
2325 */
2326 movq (%rsp), %rdi
2327 movq 0x8(%rsp), %rsi
2328 movq 0x10(%rsp), %rdx
2329 movq 0x18(%rsp), %rcx
2330 leave
2331 movq CP_COPYINSTR(%rax), %rax
2332 INDIRECT_JMP_REG(rax)
2333
2334 2: movl $EFAULT, %eax /* return EFAULT */
2335 leave
2336 ret
2337 SET_SIZE(copyinstr)
2338
2339 #elif defined(__i386)
2340
2341 #define ARG_UADDR 4
2342 #define ARG_KADDR 8
2343
2344 ENTRY(copyinstr)
2345 movl kernelbase, %ecx
2346 #ifdef DEBUG
2347 cmpl %ecx, ARG_KADDR(%esp)
2348 jnb 1f
2349 pushl %ebp
2350 movl %esp, %ebp
2351 pushl $.copyinstr_panic_msg
2352 call panic
2434 4:
2435 movq %gs:CPU_THREAD, %r9
2436 jmp 3f
2437
2438 _copyoutstr_error:
2439 SMAP_ENABLE_INSTR(9)
2440 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */
2441 3:
2442 movq T_COPYOPS(%r9), %rax
2443 cmpq $0, %rax
2444 jz 2f
2445
2446 /*
2447 * reload args for the copyop
2448 */
2449 movq (%rsp), %rdi
2450 movq 0x8(%rsp), %rsi
2451 movq 0x10(%rsp), %rdx
2452 movq 0x18(%rsp), %rcx
2453 leave
2454 movq CP_COPYOUTSTR(%rax), %rax
2455 INDIRECT_JMP_REG(rax)
2456
2457 2: movl $EFAULT, %eax /* return EFAULT */
2458 leave
2459 ret
2460 SET_SIZE(copyoutstr)
2461
2462 #elif defined(__i386)
2463
2464 #define ARG_KADDR 4
2465 #define ARG_UADDR 8
2466
2467 ENTRY(copyoutstr)
2468 movl kernelbase, %ecx
2469 #ifdef DEBUG
2470 cmpl %ecx, ARG_KADDR(%esp)
2471 jnb 1f
2472 pushl %ebp
2473 movl %esp, %ebp
2474 pushl $.copyoutstr_panic_msg
2475 call panic
2551 ENTRY(NAME) \
2552 movq %gs:CPU_THREAD, %r9; \
2553 cmpq kernelbase(%rip), %rdi; \
2554 jae 1f; \
2555 leaq _flt_/**/NAME, %rdx; \
2556 movq %rdx, T_LOFAULT(%r9); \
2557 SMAP_DISABLE_INSTR(DISNUM) \
2558 INSTR (%rdi), REG; \
2559 movq $0, T_LOFAULT(%r9); \
2560 INSTR REG, (%rsi); \
2561 xorl %eax, %eax; \
2562 SMAP_ENABLE_INSTR(EN1) \
2563 ret; \
2564 _flt_/**/NAME: \
2565 SMAP_ENABLE_INSTR(EN2) \
2566 movq $0, T_LOFAULT(%r9); \
2567 1: \
2568 movq T_COPYOPS(%r9), %rax; \
2569 cmpq $0, %rax; \
2570 jz 2f; \
2571 movq COPYOP(%rax), %rax; \
2572 INDIRECT_JMP_REG(rax); \
2573 2: \
2574 movl $-1, %eax; \
2575 ret; \
2576 SET_SIZE(NAME)
2577
2578 FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
2579 FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
2580 FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
2581 FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
2582
2583 #elif defined(__i386)
2584
2585 #define FUWORD(NAME, INSTR, REG, COPYOP) \
2586 ENTRY(NAME) \
2587 movl %gs:CPU_THREAD, %ecx; \
2588 movl kernelbase, %eax; \
2589 cmpl %eax, 4(%esp); \
2590 jae 1f; \
2591 lea _flt_/**/NAME, %edx; \
2592 movl %edx, T_LOFAULT(%ecx); \
2661 #define SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2) \
2662 ENTRY(NAME) \
2663 movq %gs:CPU_THREAD, %r9; \
2664 cmpq kernelbase(%rip), %rdi; \
2665 jae 1f; \
2666 leaq _flt_/**/NAME, %rdx; \
2667 SMAP_DISABLE_INSTR(DISNUM) \
2668 movq %rdx, T_LOFAULT(%r9); \
2669 INSTR REG, (%rdi); \
2670 movq $0, T_LOFAULT(%r9); \
2671 xorl %eax, %eax; \
2672 SMAP_ENABLE_INSTR(EN1) \
2673 ret; \
2674 _flt_/**/NAME: \
2675 SMAP_ENABLE_INSTR(EN2) \
2676 movq $0, T_LOFAULT(%r9); \
2677 1: \
2678 movq T_COPYOPS(%r9), %rax; \
2679 cmpq $0, %rax; \
2680 jz 3f; \
2681 movq COPYOP(%rax), %rax; \
2682 INDIRECT_JMP_REG(rax); \
2683 3: \
2684 movl $-1, %eax; \
2685 ret; \
2686 SET_SIZE(NAME)
2687
2688 SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
2689 SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
2690 SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
2691 SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
2692
2693 #elif defined(__i386)
2694
2695 #define SUWORD(NAME, INSTR, REG, COPYOP) \
2696 ENTRY(NAME) \
2697 movl %gs:CPU_THREAD, %ecx; \
2698 movl kernelbase, %eax; \
2699 cmpl %eax, 4(%esp); \
2700 jae 1f; \
2701 lea _flt_/**/NAME, %edx; \
2702 movl %edx, T_LOFAULT(%ecx); \
|